Use BoringSSL implementation of BIGNUM (#2)

Vendor limited section of BoringSSL instead of depending on OpenSSL libs.
Main reason for doing this was libressl BIGNUM don't always generate the correct values
Vendoring based on swift-crypto implementation
Remove support swift 5.0
This commit is contained in:
Adam Fowler 2020-05-06 18:11:24 +01:00 committed by GitHub
parent 7aa8e7c67d
commit f00adf94f4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
205 changed files with 165846 additions and 85 deletions

View File

@ -18,7 +18,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
tag: ['5.0', '5.1', '5.2']
tag: ['5.1', '5.2']
container:
image: swift:${{ matrix.tag }}
steps:

1
.gitignore vendored
View File

@ -4,3 +4,4 @@
/Packages
/*.xcodeproj
xcuserdata/
/.boringssl

View File

@ -1,4 +1,4 @@
// swift-tools-version:5.0
// swift-tools-version:5.1
// The swift-tools-version declares the minimum version of Swift required to build this package.
import PackageDescription
@ -8,22 +8,14 @@ let package = Package(
products: [
// Products define the executables and libraries produced by a package, and make them visible to other packages.
.library(name: "BigNum", targets: ["BigNum"]),
/* This target is used only for symbol mangling. It's added and removed automatically because it emits build warnings. MANGLE_START
.library(name: "CBigNumBoringSSL", type: .static, targets: ["CBigNumBoringSSL"]),
MANGLE_END */
],
dependencies: [
// Dependencies declare other packages that this package depends on.
// .package(url: /* package url */, from: "1.0.0"),
],
dependencies: [],
targets: [
.target(name: "BigNum", dependencies: ["CBigNum"]),
.target(name: "CBigNum", dependencies: ["CBigNumOpenSSL"]),
.systemLibrary(
name: "CBigNumOpenSSL",
pkgConfig: "openssl",
providers: [
.apt(["openssl libssl-dev"]),
.brew(["openssl"])
]
),
.target(name: "BigNum", dependencies: ["CBigNumBoringSSL"]),
.target(name: "CBigNumBoringSSL"),
.testTarget(name: "BigNumTests", dependencies: ["BigNum"]),
]
)

View File

@ -4,51 +4,50 @@
/// Inspired by the implementation here https://github.com/Bouke/Bignum
///
import CBigNum
@_implementationOnly import CBigNumBoringSSL
import Foundation
/// Swift wrapper class for BIGNUM functions in OpenSSL library
public final class BigNum {
// ctx is an `OpaquePointer` because in OpenSSL 1.1 `BIGNUM` is an incomplete type. Still have to jump
// through hoops though because in other builds it is complete type and the compiler complains about
// casting to and from an OpaquePointer
// ctx is an `OpaquePointer` because CBigNumBoringSSL has been imported as implementation only. Still need
// to convert back and forth between `OpaquePointer` and `UnsafeMutablePointer<BIGNUM> though
internal let ctx: OpaquePointer?
public init() {
ctx = BN_new().convert()
ctx = CBigNumBoringSSL_BN_new().convert()
}
public init(_ int: Int) {
let ctx = BN_new()
let ctx = CBigNumBoringSSL_BN_new()
withUnsafePointer(to: int.bigEndian) { bytes in
let raw = UnsafeRawPointer(bytes)
let p = raw.bindMemory(to: UInt8.self, capacity: MemoryLayout<Int>.size)
BN_bin2bn(p, Int32(MemoryLayout<Int>.size), ctx)
CBigNumBoringSSL_BN_bin2bn(p, Int(MemoryLayout<Int>.size), ctx)
}
self.ctx = ctx!.convert()
}
public init?(_ dec: String) {
var ctx = BN_new()
if BN_dec2bn(&ctx, dec) == 0 {
var ctx = CBigNumBoringSSL_BN_new()
if CBigNumBoringSSL_BN_dec2bn(&ctx, dec) == 0 {
return nil
}
self.ctx = ctx!.convert()
}
public init?(hex: String) {
var ctx = BN_new()
if BN_hex2bn(&ctx, hex) == 0 {
var ctx = CBigNumBoringSSL_BN_new()
if CBigNumBoringSSL_BN_hex2bn(&ctx, hex) == 0 {
return nil
}
self.ctx = ctx!.convert()
}
public init<D: ContiguousBytes>(bytes: D) {
let ctx = BN_new()
let ctx = CBigNumBoringSSL_BN_new()
bytes.withUnsafeBytes { bytes in
if let p = bytes.baseAddress?.assumingMemoryBound(to: UInt8.self) {
BN_bin2bn(p, .init(bytes.count), ctx)
CBigNumBoringSSL_BN_bin2bn(p, .init(bytes.count), ctx)
}
}
self.ctx = ctx!.convert()
@ -56,48 +55,48 @@ public final class BigNum {
@available(*, deprecated, message: "Please user init(bytes:) instead")
public init<D: DataProtocol>(data: D) {
let ctx = BN_new()
let ctx = CBigNumBoringSSL_BN_new()
if data.withContiguousStorageIfAvailable({bytes in
BN_bin2bn(bytes.baseAddress, .init(data.count), ctx)
CBigNumBoringSSL_BN_bin2bn(bytes.baseAddress, .init(data.count), ctx)
}) == nil {
var buffer = UnsafeMutableBufferPointer<UInt8>.allocate(capacity: data.count)
data.copyBytes(to: buffer)
defer { buffer.deallocate() }
BN_bin2bn(buffer.baseAddress, .init(data.count), ctx)
CBigNumBoringSSL_BN_bin2bn(buffer.baseAddress, .init(data.count), ctx)
}
self.ctx = ctx!.convert()
}
deinit {
BN_free(ctx?.convert())
CBigNumBoringSSL_BN_free(ctx?.convert())
}
public var data: Data {
var data = Data(count: Int((BN_num_bits(ctx?.convert()) + 7) / 8))
var data = Data(count: Int((CBigNumBoringSSL_BN_num_bits(ctx?.convert()) + 7) / 8))
_ = data.withUnsafeMutableBytes { bytes in
if let p = bytes.baseAddress?.assumingMemoryBound(to: UInt8.self) {
BN_bn2bin(ctx?.convert(), p)
CBigNumBoringSSL_BN_bn2bin(ctx?.convert(), p)
}
}
return data
}
public var bytes: [UInt8] {
var bytes = [UInt8].init(repeating: 0, count: Int((BN_num_bits(ctx?.convert()) + 7) / 8))
var bytes = [UInt8].init(repeating: 0, count: Int((CBigNumBoringSSL_BN_num_bits(ctx?.convert()) + 7) / 8))
_ = bytes.withUnsafeMutableBytes { bytes in
if let p = bytes.baseAddress?.assumingMemoryBound(to: UInt8.self) {
BN_bn2bin(ctx?.convert(), p)
CBigNumBoringSSL_BN_bn2bin(ctx?.convert(), p)
}
}
return bytes
}
public var dec: String {
return String(validatingUTF8: BN_bn2dec(ctx?.convert()))!
return String(validatingUTF8: CBigNumBoringSSL_BN_bn2dec(ctx?.convert()))!
}
public var hex: String {
return String(validatingUTF8: BN_bn2hex(ctx?.convert()))!
return String(validatingUTF8: CBigNumBoringSSL_BN_bn2hex(ctx?.convert()))!
}
}
@ -109,11 +108,11 @@ extension BigNum: CustomStringConvertible {
extension BigNum: Comparable {
public static func == (lhs: BigNum, rhs: BigNum) -> Bool {
return BN_cmp(lhs.ctx?.convert(), rhs.ctx?.convert()) == 0
return CBigNumBoringSSL_BN_cmp(lhs.ctx?.convert(), rhs.ctx?.convert()) == 0
}
public static func < (lhs: BigNum, rhs: BigNum) -> Bool {
return BN_cmp(lhs.ctx?.convert(), rhs.ctx?.convert()) == -1
return CBigNumBoringSSL_BN_cmp(lhs.ctx?.convert(), rhs.ctx?.convert()) == -1
}
}
@ -136,56 +135,56 @@ extension BigNum {
static func operationWithCtx(_ block: (BigNum, OpaquePointer?) -> Int32) -> BigNum {
let result = BigNum()
let context = BN_CTX_new()
let context = CBigNumBoringSSL_BN_CTX_new()
precondition(block(result, context) == 1)
BN_CTX_free(context)
CBigNumBoringSSL_BN_CTX_free(context)
return result
}
}
public func + (lhs: BigNum, rhs: BigNum) -> BigNum {
return BigNum.operation {
BN_add($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert())
CBigNumBoringSSL_BN_add($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert())
}
}
public func - (lhs: BigNum, rhs: BigNum) -> BigNum {
return BigNum.operation {
BN_sub($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert())
CBigNumBoringSSL_BN_sub($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert())
}
}
public func * (lhs: BigNum, rhs: BigNum) -> BigNum {
return BigNum.operationWithCtx {
BN_mul($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
CBigNumBoringSSL_BN_mul($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
}
}
/// Returns lhs / rhs, rounded to zero.
public func / (lhs: BigNum, rhs: BigNum) -> BigNum {
return BigNum.operationWithCtx {
BN_div($0.ctx?.convert(), nil, lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
CBigNumBoringSSL_BN_div($0.ctx?.convert(), nil, lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
}
}
/// Returns lhs / rhs, rounded to zero.
public func % (lhs: BigNum, rhs: BigNum) -> BigNum {
return BigNum.operationWithCtx {
BN_div(nil, $0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
CBigNumBoringSSL_BN_div(nil, $0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
}
}
/// right shift
public func >> (lhs: BigNum, shift: Int32) -> BigNum {
return BigNum.operation {
BN_rshift($0.ctx?.convert(), lhs.ctx?.convert(), shift)
CBigNumBoringSSL_BN_rshift($0.ctx?.convert(), lhs.ctx?.convert(), shift)
}
}
/// left shift
public func << (lhs: BigNum, shift: Int32) -> BigNum {
return BigNum.operation {
BN_lshift($0.ctx?.convert(), lhs.ctx?.convert(), shift)
CBigNumBoringSSL_BN_lshift($0.ctx?.convert(), lhs.ctx?.convert(), shift)
}
}
@ -195,111 +194,111 @@ public extension BigNum {
static func += (lhs: inout BigNum, rhs: BigNum) {
lhs = BigNum.operation {
BN_add($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert())
CBigNumBoringSSL_BN_add($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert())
}
}
static func -= (lhs: inout BigNum, rhs: BigNum) {
lhs = BigNum.operation {
BN_sub($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert())
CBigNumBoringSSL_BN_sub($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert())
}
}
static func *= (lhs: inout BigNum, rhs: BigNum) {
lhs = BigNum.operationWithCtx {
BN_mul($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
CBigNumBoringSSL_BN_mul($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
}
}
static func /= (lhs: inout BigNum, rhs: BigNum) {
lhs = BigNum.operationWithCtx {
BN_div($0.ctx?.convert(), nil, lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
CBigNumBoringSSL_BN_div($0.ctx?.convert(), nil, lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
}
}
static func %= (lhs: inout BigNum, rhs: BigNum) {
lhs = BigNum.operationWithCtx {
BN_div(nil, $0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
CBigNumBoringSSL_BN_div(nil, $0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
}
}
/// Returns: (self ** 2)
func sqr() -> BigNum {
return BigNum.operationWithCtx {
BN_sqr($0.ctx?.convert(), self.ctx?.convert(), $1)
CBigNumBoringSSL_BN_sqr($0.ctx?.convert(), self.ctx?.convert(), $1)
}
}
/// Returns: (self ** p)
func power(_ p: BigNum) -> BigNum {
return BigNum.operationWithCtx {
BN_exp($0.ctx?.convert(), self.ctx?.convert(), p.ctx?.convert(), $1)
CBigNumBoringSSL_BN_exp($0.ctx?.convert(), self.ctx?.convert(), p.ctx?.convert(), $1)
}
}
/// Returns: (self + b) % N
func add(_ b: BigNum, modulus: BigNum) -> BigNum {
return BigNum.operationWithCtx {
BN_mod_add($0.ctx?.convert(), self.ctx?.convert(), b.ctx?.convert(), modulus.ctx?.convert(), $1)
CBigNumBoringSSL_BN_mod_add($0.ctx?.convert(), self.ctx?.convert(), b.ctx?.convert(), modulus.ctx?.convert(), $1)
}
}
/// Returns: (a - b) % N
func sub(_ b: BigNum, modulus: BigNum) -> BigNum {
return BigNum.operationWithCtx {
BN_mod_sub($0.ctx?.convert(), self.ctx?.convert(), b.ctx?.convert(), modulus.ctx?.convert(), $1)
CBigNumBoringSSL_BN_mod_sub($0.ctx?.convert(), self.ctx?.convert(), b.ctx?.convert(), modulus.ctx?.convert(), $1)
}
}
/// Returns: (a * b) % N
func mul(_ b: BigNum, modulus: BigNum) -> BigNum {
return BigNum.operationWithCtx {
BN_mod_mul($0.ctx?.convert(), self.ctx?.convert(), b.ctx?.convert(), modulus.ctx?.convert(), $1)
CBigNumBoringSSL_BN_mod_mul($0.ctx?.convert(), self.ctx?.convert(), b.ctx?.convert(), modulus.ctx?.convert(), $1)
}
}
/// Returns: (a ** 2) % N
func sqr(modulus: BigNum) -> BigNum {
return BigNum.operationWithCtx {
BN_mod_sqr($0.ctx?.convert(), self.ctx?.convert(), modulus.ctx?.convert(), $1)
CBigNumBoringSSL_BN_mod_sqr($0.ctx?.convert(), self.ctx?.convert(), modulus.ctx?.convert(), $1)
}
}
/// Returns: (a ** p) % N
func power(_ p: BigNum, modulus: BigNum) -> BigNum {
return BigNum.operationWithCtx {
BN_mod_exp($0.ctx?.convert(), self.ctx?.convert(), p.ctx?.convert(), modulus.ctx?.convert(), $1)
CBigNumBoringSSL_BN_mod_exp($0.ctx?.convert(), self.ctx?.convert(), p.ctx?.convert(), modulus.ctx?.convert(), $1)
}
}
/// Return greatest common denominator
static func gcd(_ first: BigNum, _ second: BigNum) -> BigNum {
return operationWithCtx {
BN_gcd($0.ctx?.convert(), first.ctx?.convert(), second.ctx?.convert(), $1)
CBigNumBoringSSL_BN_gcd($0.ctx?.convert(), first.ctx?.convert(), second.ctx?.convert(), $1)
}
}
/// Bitwise operations
func setBit(_ bit: Int32) {
BN_set_bit(self.ctx?.convert(), bit)
CBigNumBoringSSL_BN_set_bit(self.ctx?.convert(), bit)
}
func clearBit(_ bit: Int32) {
BN_clear_bit(self.ctx?.convert(), bit)
CBigNumBoringSSL_BN_clear_bit(self.ctx?.convert(), bit)
}
func mask(_ bits: Int32) {
BN_mask_bits(self.ctx?.convert(), bits)
CBigNumBoringSSL_BN_mask_bits(self.ctx?.convert(), bits)
}
func isBitSet(_ bit: Int32) -> Bool {
let set = BN_is_bit_set(self.ctx?.convert(), bit)
let set = CBigNumBoringSSL_BN_is_bit_set(self.ctx?.convert(), bit)
return set == 1 ? true : false
}
func numBits() -> Int32 {
return BN_num_bits(self.ctx?.convert())
func numBits() -> UInt32 {
return CBigNumBoringSSL_BN_num_bits(self.ctx?.convert())
}
/// random number generators
@ -313,45 +312,45 @@ public extension BigNum {
/// return cryptographically strong random number of maximum size defined in bits. random needs seeding prior to be called
static func random(bits: Int32, top: Top = .any, odd: Bool = false) -> BigNum {
return operation {
BN_rand($0.ctx?.convert(), bits, top.rawValue, odd ? 1 : 0)
CBigNumBoringSSL_BN_rand($0.ctx?.convert(), bits, top.rawValue, odd ? 1 : 0)
}
}
/// return pseudo random number of maximum size defined in bits.
static func psuedo_random(bits: Int32, top: Top = .any, odd: Bool = false) -> BigNum {
return operation {
BN_pseudo_rand($0.ctx?.convert(), bits, top.rawValue, odd ? 1 : 0)
CBigNumBoringSSL_BN_pseudo_rand($0.ctx?.convert(), bits, top.rawValue, odd ? 1 : 0)
}
}
/// return cryptographically strong random number in range (0...max-1). random needs seeding prior to be called
static func random(max: BigNum) -> BigNum {
return operation {
BN_rand_range($0.ctx?.convert(), max.ctx?.convert())
CBigNumBoringSSL_BN_rand_range($0.ctx?.convert(), max.ctx?.convert())
}
}
/// return pseudo random number in range (0..<max)
static func psuedo_random(max: BigNum) -> BigNum {
return operation {
BN_pseudo_rand_range($0.ctx?.convert(), max.ctx?.convert())
CBigNumBoringSSL_BN_pseudo_rand_range($0.ctx?.convert(), max.ctx?.convert())
}
}
/// prime number generator
static func generatePrime(bitSize: Int32, safe: Bool, add: BigNum? = nil, remainder: BigNum? = nil) -> BigNum {
return operation {
BN_generate_prime_ex($0.ctx?.convert(), bitSize, safe ? 1 : 0, add?.ctx?.convert(), remainder?.ctx?.convert(), nil)
CBigNumBoringSSL_BN_generate_prime_ex($0.ctx?.convert(), bitSize, safe ? 1 : 0, add?.ctx?.convert(), remainder?.ctx?.convert(), nil)
}
}
/// prime number generator
func isPrime(numChecks: Int32) -> Bool {
let context = BN_CTX_new()
let context = CBigNumBoringSSL_BN_CTX_new()
defer {
BN_CTX_free(context)
CBigNumBoringSSL_BN_CTX_free(context)
}
return BN_is_prime_ex(self.ctx?.convert(), numChecks, context, nil) == 1
return CBigNumBoringSSL_BN_is_prime_ex(self.ctx?.convert(), numChecks, context, nil) == 1
}
}

View File

@ -1 +0,0 @@
#include "include/c_big_num.h"

View File

@ -1 +0,0 @@
#include <openssl/bn.h>

View File

@ -0,0 +1,700 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.] */
#include <CBigNumBoringSSL_bio.h>
#include <assert.h>
#include <errno.h>
#include <limits.h>
#include <string.h>
#include <CBigNumBoringSSL_asn1.h>
#include <CBigNumBoringSSL_err.h>
#include <CBigNumBoringSSL_mem.h>
#include <CBigNumBoringSSL_thread.h>
#include "../internal.h"
BIO *BIO_new(const BIO_METHOD *method) {
BIO *ret = OPENSSL_malloc(sizeof(BIO));
if (ret == NULL) {
OPENSSL_PUT_ERROR(BIO, ERR_R_MALLOC_FAILURE);
return NULL;
}
OPENSSL_memset(ret, 0, sizeof(BIO));
ret->method = method;
ret->shutdown = 1;
ret->references = 1;
if (method->create != NULL && !method->create(ret)) {
OPENSSL_free(ret);
return NULL;
}
return ret;
}
int BIO_free(BIO *bio) {
BIO *next_bio;
for (; bio != NULL; bio = next_bio) {
if (!CRYPTO_refcount_dec_and_test_zero(&bio->references)) {
return 0;
}
next_bio = BIO_pop(bio);
if (bio->method != NULL && bio->method->destroy != NULL) {
bio->method->destroy(bio);
}
OPENSSL_free(bio);
}
return 1;
}
int BIO_up_ref(BIO *bio) {
CRYPTO_refcount_inc(&bio->references);
return 1;
}
void BIO_vfree(BIO *bio) {
BIO_free(bio);
}
void BIO_free_all(BIO *bio) {
BIO_free(bio);
}
int BIO_read(BIO *bio, void *buf, int len) {
if (bio == NULL || bio->method == NULL || bio->method->bread == NULL) {
OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
return -2;
}
if (!bio->init) {
OPENSSL_PUT_ERROR(BIO, BIO_R_UNINITIALIZED);
return -2;
}
if (len <= 0) {
return 0;
}
int ret = bio->method->bread(bio, buf, len);
if (ret > 0) {
bio->num_read += ret;
}
return ret;
}
int BIO_gets(BIO *bio, char *buf, int len) {
if (bio == NULL || bio->method == NULL || bio->method->bgets == NULL) {
OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
return -2;
}
if (!bio->init) {
OPENSSL_PUT_ERROR(BIO, BIO_R_UNINITIALIZED);
return -2;
}
if (len <= 0) {
return 0;
}
int ret = bio->method->bgets(bio, buf, len);
if (ret > 0) {
bio->num_read += ret;
}
return ret;
}
int BIO_write(BIO *bio, const void *in, int inl) {
if (bio == NULL || bio->method == NULL || bio->method->bwrite == NULL) {
OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
return -2;
}
if (!bio->init) {
OPENSSL_PUT_ERROR(BIO, BIO_R_UNINITIALIZED);
return -2;
}
if (inl <= 0) {
return 0;
}
int ret = bio->method->bwrite(bio, in, inl);
if (ret > 0) {
bio->num_write += ret;
}
return ret;
}
int BIO_write_all(BIO *bio, const void *data, size_t len) {
const uint8_t *data_u8 = data;
while (len > 0) {
int ret = BIO_write(bio, data_u8, len > INT_MAX ? INT_MAX : (int)len);
if (ret <= 0) {
return 0;
}
data_u8 += ret;
len -= ret;
}
return 1;
}
int BIO_puts(BIO *bio, const char *in) {
return BIO_write(bio, in, strlen(in));
}
int BIO_flush(BIO *bio) {
return BIO_ctrl(bio, BIO_CTRL_FLUSH, 0, NULL);
}
long BIO_ctrl(BIO *bio, int cmd, long larg, void *parg) {
if (bio == NULL) {
return 0;
}
if (bio->method == NULL || bio->method->ctrl == NULL) {
OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
return -2;
}
return bio->method->ctrl(bio, cmd, larg, parg);
}
char *BIO_ptr_ctrl(BIO *b, int cmd, long larg) {
char *p = NULL;
if (BIO_ctrl(b, cmd, larg, (void *)&p) <= 0) {
return NULL;
}
return p;
}
long BIO_int_ctrl(BIO *b, int cmd, long larg, int iarg) {
int i = iarg;
return BIO_ctrl(b, cmd, larg, (void *)&i);
}
int BIO_reset(BIO *bio) {
return BIO_ctrl(bio, BIO_CTRL_RESET, 0, NULL);
}
int BIO_eof(BIO *bio) {
return BIO_ctrl(bio, BIO_CTRL_EOF, 0, NULL);
}
void BIO_set_flags(BIO *bio, int flags) {
bio->flags |= flags;
}
int BIO_test_flags(const BIO *bio, int flags) {
return bio->flags & flags;
}
int BIO_should_read(const BIO *bio) {
return BIO_test_flags(bio, BIO_FLAGS_READ);
}
int BIO_should_write(const BIO *bio) {
return BIO_test_flags(bio, BIO_FLAGS_WRITE);
}
int BIO_should_retry(const BIO *bio) {
return BIO_test_flags(bio, BIO_FLAGS_SHOULD_RETRY);
}
int BIO_should_io_special(const BIO *bio) {
return BIO_test_flags(bio, BIO_FLAGS_IO_SPECIAL);
}
int BIO_get_retry_reason(const BIO *bio) { return bio->retry_reason; }
void BIO_clear_flags(BIO *bio, int flags) {
bio->flags &= ~flags;
}
void BIO_set_retry_read(BIO *bio) {
bio->flags |= BIO_FLAGS_READ | BIO_FLAGS_SHOULD_RETRY;
}
void BIO_set_retry_write(BIO *bio) {
bio->flags |= BIO_FLAGS_WRITE | BIO_FLAGS_SHOULD_RETRY;
}
static const int kRetryFlags = BIO_FLAGS_RWS | BIO_FLAGS_SHOULD_RETRY;
int BIO_get_retry_flags(BIO *bio) {
return bio->flags & kRetryFlags;
}
void BIO_clear_retry_flags(BIO *bio) {
bio->flags &= ~kRetryFlags;
bio->retry_reason = 0;
}
int BIO_method_type(const BIO *bio) { return bio->method->type; }
void BIO_copy_next_retry(BIO *bio) {
BIO_clear_retry_flags(bio);
BIO_set_flags(bio, BIO_get_retry_flags(bio->next_bio));
bio->retry_reason = bio->next_bio->retry_reason;
}
long BIO_callback_ctrl(BIO *bio, int cmd, bio_info_cb fp) {
if (bio == NULL) {
return 0;
}
if (bio->method == NULL || bio->method->callback_ctrl == NULL) {
OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
return 0;
}
return bio->method->callback_ctrl(bio, cmd, fp);
}
size_t BIO_pending(const BIO *bio) {
const long r = BIO_ctrl((BIO *) bio, BIO_CTRL_PENDING, 0, NULL);
assert(r >= 0);
if (r < 0) {
return 0;
}
return r;
}
size_t BIO_ctrl_pending(const BIO *bio) {
return BIO_pending(bio);
}
size_t BIO_wpending(const BIO *bio) {
const long r = BIO_ctrl((BIO *) bio, BIO_CTRL_WPENDING, 0, NULL);
assert(r >= 0);
if (r < 0) {
return 0;
}
return r;
}
int BIO_set_close(BIO *bio, int close_flag) {
return BIO_ctrl(bio, BIO_CTRL_SET_CLOSE, close_flag, NULL);
}
OPENSSL_EXPORT size_t BIO_number_read(const BIO *bio) {
return bio->num_read;
}
OPENSSL_EXPORT size_t BIO_number_written(const BIO *bio) {
return bio->num_write;
}
BIO *BIO_push(BIO *bio, BIO *appended_bio) {
BIO *last_bio;
if (bio == NULL) {
return bio;
}
last_bio = bio;
while (last_bio->next_bio != NULL) {
last_bio = last_bio->next_bio;
}
last_bio->next_bio = appended_bio;
return bio;
}
BIO *BIO_pop(BIO *bio) {
BIO *ret;
if (bio == NULL) {
return NULL;
}
ret = bio->next_bio;
bio->next_bio = NULL;
return ret;
}
BIO *BIO_next(BIO *bio) {
if (!bio) {
return NULL;
}
return bio->next_bio;
}
BIO *BIO_find_type(BIO *bio, int type) {
int method_type, mask;
if (!bio) {
return NULL;
}
mask = type & 0xff;
do {
if (bio->method != NULL) {
method_type = bio->method->type;
if (!mask) {
if (method_type & type) {
return bio;
}
} else if (method_type == type) {
return bio;
}
}
bio = bio->next_bio;
} while (bio != NULL);
return NULL;
}
int BIO_indent(BIO *bio, unsigned indent, unsigned max_indent) {
if (indent > max_indent) {
indent = max_indent;
}
while (indent--) {
if (BIO_puts(bio, " ") != 1) {
return 0;
}
}
return 1;
}
static int print_bio(const char *str, size_t len, void *bio) {
return BIO_write((BIO *)bio, str, len);
}
void ERR_print_errors(BIO *bio) {
ERR_print_errors_cb(print_bio, bio);
}
// bio_read_all reads everything from |bio| and prepends |prefix| to it. On
// success, |*out| is set to an allocated buffer (which should be freed with
// |OPENSSL_free|), |*out_len| is set to its length and one is returned. The
// buffer will contain |prefix| followed by the contents of |bio|. On failure,
// zero is returned.
//
// The function will fail if the size of the output would equal or exceed
// |max_len|.
static int bio_read_all(BIO *bio, uint8_t **out, size_t *out_len,
const uint8_t *prefix, size_t prefix_len,
size_t max_len) {
static const size_t kChunkSize = 4096;
size_t len = prefix_len + kChunkSize;
if (len > max_len) {
len = max_len;
}
if (len < prefix_len) {
return 0;
}
*out = OPENSSL_malloc(len);
if (*out == NULL) {
return 0;
}
OPENSSL_memcpy(*out, prefix, prefix_len);
size_t done = prefix_len;
for (;;) {
if (done == len) {
OPENSSL_free(*out);
return 0;
}
const size_t todo = len - done;
assert(todo < INT_MAX);
const int n = BIO_read(bio, *out + done, todo);
if (n == 0) {
*out_len = done;
return 1;
} else if (n == -1) {
OPENSSL_free(*out);
return 0;
}
done += n;
if (len < max_len && len - done < kChunkSize / 2) {
len += kChunkSize;
if (len < kChunkSize || len > max_len) {
len = max_len;
}
uint8_t *new_buf = OPENSSL_realloc(*out, len);
if (new_buf == NULL) {
OPENSSL_free(*out);
return 0;
}
*out = new_buf;
}
}
}
// bio_read_full reads |len| bytes |bio| and writes them into |out|. It
// tolerates partial reads from |bio| and returns one on success or zero if a
// read fails before |len| bytes are read. On failure, it additionally sets
// |*out_eof_on_first_read| to whether the error was due to |bio| returning zero
// on the first read. |out_eof_on_first_read| may be NULL to discard the value.
static int bio_read_full(BIO *bio, uint8_t *out, int *out_eof_on_first_read,
size_t len) {
int first_read = 1;
while (len > 0) {
int todo = len <= INT_MAX ? (int)len : INT_MAX;
int ret = BIO_read(bio, out, todo);
if (ret <= 0) {
if (out_eof_on_first_read != NULL) {
*out_eof_on_first_read = first_read && ret == 0;
}
return 0;
}
out += ret;
len -= (size_t)ret;
first_read = 0;
}
return 1;
}
// For compatibility with existing |d2i_*_bio| callers, |BIO_read_asn1| uses
// |ERR_LIB_ASN1| errors.
OPENSSL_DECLARE_ERROR_REASON(ASN1, ASN1_R_DECODE_ERROR)
OPENSSL_DECLARE_ERROR_REASON(ASN1, ASN1_R_HEADER_TOO_LONG)
OPENSSL_DECLARE_ERROR_REASON(ASN1, ASN1_R_NOT_ENOUGH_DATA)
OPENSSL_DECLARE_ERROR_REASON(ASN1, ASN1_R_TOO_LONG)
int BIO_read_asn1(BIO *bio, uint8_t **out, size_t *out_len, size_t max_len) {
uint8_t header[6];
static const size_t kInitialHeaderLen = 2;
int eof_on_first_read;
if (!bio_read_full(bio, header, &eof_on_first_read, kInitialHeaderLen)) {
if (eof_on_first_read) {
// Historically, OpenSSL returned |ASN1_R_HEADER_TOO_LONG| when
// |d2i_*_bio| could not read anything. CPython conditions on this to
// determine if |bio| was empty.
OPENSSL_PUT_ERROR(ASN1, ASN1_R_HEADER_TOO_LONG);
} else {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_NOT_ENOUGH_DATA);
}
return 0;
}
const uint8_t tag = header[0];
const uint8_t length_byte = header[1];
if ((tag & 0x1f) == 0x1f) {
// Long form tags are not supported.
OPENSSL_PUT_ERROR(ASN1, ASN1_R_DECODE_ERROR);
return 0;
}
size_t len, header_len;
if ((length_byte & 0x80) == 0) {
// Short form length.
len = length_byte;
header_len = kInitialHeaderLen;
} else {
const size_t num_bytes = length_byte & 0x7f;
if ((tag & 0x20 /* constructed */) != 0 && num_bytes == 0) {
// indefinite length.
if (!bio_read_all(bio, out, out_len, header, kInitialHeaderLen,
max_len)) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_NOT_ENOUGH_DATA);
return 0;
}
return 1;
}
if (num_bytes == 0 || num_bytes > 4) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_DECODE_ERROR);
return 0;
}
if (!bio_read_full(bio, header + kInitialHeaderLen, NULL, num_bytes)) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_NOT_ENOUGH_DATA);
return 0;
}
header_len = kInitialHeaderLen + num_bytes;
uint32_t len32 = 0;
for (unsigned i = 0; i < num_bytes; i++) {
len32 <<= 8;
len32 |= header[kInitialHeaderLen + i];
}
if (len32 < 128) {
// Length should have used short-form encoding.
OPENSSL_PUT_ERROR(ASN1, ASN1_R_DECODE_ERROR);
return 0;
}
if ((len32 >> ((num_bytes-1)*8)) == 0) {
// Length should have been at least one byte shorter.
OPENSSL_PUT_ERROR(ASN1, ASN1_R_DECODE_ERROR);
return 0;
}
len = len32;
}
if (len + header_len < len ||
len + header_len > max_len ||
len > INT_MAX) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_TOO_LONG);
return 0;
}
len += header_len;
*out_len = len;
*out = OPENSSL_malloc(len);
if (*out == NULL) {
OPENSSL_PUT_ERROR(ASN1, ERR_R_MALLOC_FAILURE);
return 0;
}
OPENSSL_memcpy(*out, header, header_len);
if (!bio_read_full(bio, (*out) + header_len, NULL, len - header_len)) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_NOT_ENOUGH_DATA);
OPENSSL_free(*out);
return 0;
}
return 1;
}
void BIO_set_retry_special(BIO *bio) {
bio->flags |= BIO_FLAGS_READ | BIO_FLAGS_IO_SPECIAL;
}
int BIO_set_write_buffer_size(BIO *bio, int buffer_size) { return 0; }
static struct CRYPTO_STATIC_MUTEX g_index_lock = CRYPTO_STATIC_MUTEX_INIT;
static int g_index = BIO_TYPE_START;
int BIO_get_new_index(void) {
CRYPTO_STATIC_MUTEX_lock_write(&g_index_lock);
// If |g_index| exceeds 255, it will collide with the flags bits.
int ret = g_index > 255 ? -1 : g_index++;
CRYPTO_STATIC_MUTEX_unlock_write(&g_index_lock);
return ret;
}
BIO_METHOD *BIO_meth_new(int type, const char *name) {
BIO_METHOD *method = OPENSSL_malloc(sizeof(BIO_METHOD));
if (method == NULL) {
return NULL;
}
OPENSSL_memset(method, 0, sizeof(BIO_METHOD));
method->type = type;
method->name = name;
return method;
}
void BIO_meth_free(BIO_METHOD *method) {
OPENSSL_free(method);
}
int BIO_meth_set_create(BIO_METHOD *method,
int (*create)(BIO *)) {
method->create = create;
return 1;
}
int BIO_meth_set_destroy(BIO_METHOD *method,
int (*destroy)(BIO *)) {
method->destroy = destroy;
return 1;
}
int BIO_meth_set_write(BIO_METHOD *method,
int (*write)(BIO *, const char *, int)) {
method->bwrite = write;
return 1;
}
int BIO_meth_set_read(BIO_METHOD *method,
int (*read)(BIO *, char *, int)) {
method->bread = read;
return 1;
}
int BIO_meth_set_gets(BIO_METHOD *method,
int (*gets)(BIO *, char *, int)) {
method->bgets = gets;
return 1;
}
int BIO_meth_set_ctrl(BIO_METHOD *method,
long (*ctrl)(BIO *, int, long, void *)) {
method->ctrl = ctrl;
return 1;
}
void BIO_set_data(BIO *bio, void *ptr) { bio->ptr = ptr; }
void *BIO_get_data(BIO *bio) { return bio->ptr; }
void BIO_set_init(BIO *bio, int init) { bio->init = init; }
int BIO_get_init(BIO *bio) { return bio->init; }
void BIO_set_shutdown(BIO *bio, int shutdown) { bio->shutdown = shutdown; }
int BIO_get_shutdown(BIO *bio) { return bio->shutdown; }
int BIO_meth_set_puts(BIO_METHOD *method, int (*puts)(BIO *, const char *)) {
// Ignore the parameter. We implement |BIO_puts| using |BIO_write|.
return 1;
}

View File

@ -0,0 +1,317 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.] */
#if defined(__linux) || defined(__sun) || defined(__hpux)
// Following definition aliases fopen to fopen64 on above mentioned
// platforms. This makes it possible to open and sequentially access
// files larger than 2GB from 32-bit application. It does not allow to
// traverse them beyond 2GB with fseek/ftell, but on the other hand *no*
// 32-bit platform permits that, not with fseek/ftell. Not to mention
// that breaking 2GB limit for seeking would require surgery to *our*
// API. But sequential access suffices for practical cases when you
// can run into large files, such as fingerprinting, so we can let API
// alone. For reference, the list of 32-bit platforms which allow for
// sequential access of large files without extra "magic" comprise *BSD,
// Darwin, IRIX...
#ifndef _FILE_OFFSET_BITS
#define _FILE_OFFSET_BITS 64
#endif
#endif
#include <CBigNumBoringSSL_bio.h>
#if !defined(OPENSSL_TRUSTY)
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <CBigNumBoringSSL_err.h>
#include <CBigNumBoringSSL_mem.h>
#include "../internal.h"
#define BIO_FP_READ 0x02
#define BIO_FP_WRITE 0x04
#define BIO_FP_APPEND 0x08
BIO *BIO_new_file(const char *filename, const char *mode) {
BIO *ret;
FILE *file;
file = fopen(filename, mode);
if (file == NULL) {
OPENSSL_PUT_SYSTEM_ERROR();
ERR_add_error_data(5, "fopen('", filename, "','", mode, "')");
if (errno == ENOENT) {
OPENSSL_PUT_ERROR(BIO, BIO_R_NO_SUCH_FILE);
} else {
OPENSSL_PUT_ERROR(BIO, BIO_R_SYS_LIB);
}
return NULL;
}
ret = BIO_new_fp(file, BIO_CLOSE);
if (ret == NULL) {
fclose(file);
return NULL;
}
return ret;
}
BIO *BIO_new_fp(FILE *stream, int close_flag) {
BIO *ret = BIO_new(BIO_s_file());
if (ret == NULL) {
return NULL;
}
BIO_set_fp(ret, stream, close_flag);
return ret;
}
static int file_new(BIO *bio) { return 1; }
static int file_free(BIO *bio) {
if (bio == NULL) {
return 0;
}
if (!bio->shutdown) {
return 1;
}
if (bio->init && bio->ptr != NULL) {
fclose(bio->ptr);
bio->ptr = NULL;
}
bio->init = 0;
return 1;
}
static int file_read(BIO *b, char *out, int outl) {
if (!b->init) {
return 0;
}
size_t ret = fread(out, 1, outl, (FILE *)b->ptr);
if (ret == 0 && ferror((FILE *)b->ptr)) {
OPENSSL_PUT_SYSTEM_ERROR();
OPENSSL_PUT_ERROR(BIO, ERR_R_SYS_LIB);
return -1;
}
// fread reads at most |outl| bytes, so |ret| fits in an int.
return (int)ret;
}
static int file_write(BIO *b, const char *in, int inl) {
int ret = 0;
if (!b->init) {
return 0;
}
ret = fwrite(in, inl, 1, (FILE *)b->ptr);
if (ret > 0) {
ret = inl;
}
return ret;
}
static long file_ctrl(BIO *b, int cmd, long num, void *ptr) {
long ret = 1;
FILE *fp = (FILE *)b->ptr;
FILE **fpp;
char p[4];
switch (cmd) {
case BIO_CTRL_RESET:
num = 0;
OPENSSL_FALLTHROUGH;
case BIO_C_FILE_SEEK:
ret = (long)fseek(fp, num, 0);
break;
case BIO_CTRL_EOF:
ret = (long)feof(fp);
break;
case BIO_C_FILE_TELL:
case BIO_CTRL_INFO:
ret = ftell(fp);
break;
case BIO_C_SET_FILE_PTR:
file_free(b);
b->shutdown = (int)num & BIO_CLOSE;
b->ptr = ptr;
b->init = 1;
break;
case BIO_C_SET_FILENAME:
file_free(b);
b->shutdown = (int)num & BIO_CLOSE;
if (num & BIO_FP_APPEND) {
if (num & BIO_FP_READ) {
OPENSSL_strlcpy(p, "a+", sizeof(p));
} else {
OPENSSL_strlcpy(p, "a", sizeof(p));
}
} else if ((num & BIO_FP_READ) && (num & BIO_FP_WRITE)) {
OPENSSL_strlcpy(p, "r+", sizeof(p));
} else if (num & BIO_FP_WRITE) {
OPENSSL_strlcpy(p, "w", sizeof(p));
} else if (num & BIO_FP_READ) {
OPENSSL_strlcpy(p, "r", sizeof(p));
} else {
OPENSSL_PUT_ERROR(BIO, BIO_R_BAD_FOPEN_MODE);
ret = 0;
break;
}
fp = fopen(ptr, p);
if (fp == NULL) {
OPENSSL_PUT_SYSTEM_ERROR();
ERR_add_error_data(5, "fopen('", ptr, "','", p, "')");
OPENSSL_PUT_ERROR(BIO, ERR_R_SYS_LIB);
ret = 0;
break;
}
b->ptr = fp;
b->init = 1;
break;
case BIO_C_GET_FILE_PTR:
// the ptr parameter is actually a FILE ** in this case.
if (ptr != NULL) {
fpp = (FILE **)ptr;
*fpp = (FILE *)b->ptr;
}
break;
case BIO_CTRL_GET_CLOSE:
ret = (long)b->shutdown;
break;
case BIO_CTRL_SET_CLOSE:
b->shutdown = (int)num;
break;
case BIO_CTRL_FLUSH:
ret = 0 == fflush((FILE *)b->ptr);
break;
case BIO_CTRL_WPENDING:
case BIO_CTRL_PENDING:
default:
ret = 0;
break;
}
return ret;
}
static int file_gets(BIO *bp, char *buf, int size) {
int ret = 0;
if (size == 0) {
return 0;
}
if (!fgets(buf, size, (FILE *)bp->ptr)) {
buf[0] = 0;
goto err;
}
ret = strlen(buf);
err:
return ret;
}
static const BIO_METHOD methods_filep = {
BIO_TYPE_FILE, "FILE pointer",
file_write, file_read,
NULL /* puts */, file_gets,
file_ctrl, file_new,
file_free, NULL /* callback_ctrl */,
};
const BIO_METHOD *BIO_s_file(void) { return &methods_filep; }
int BIO_get_fp(BIO *bio, FILE **out_file) {
return BIO_ctrl(bio, BIO_C_GET_FILE_PTR, 0, (char*) out_file);
}
int BIO_set_fp(BIO *bio, FILE *file, int close_flag) {
return BIO_ctrl(bio, BIO_C_SET_FILE_PTR, close_flag, (char *) file);
}
int BIO_read_filename(BIO *bio, const char *filename) {
return BIO_ctrl(bio, BIO_C_SET_FILENAME, BIO_CLOSE | BIO_FP_READ,
(char *)filename);
}
int BIO_write_filename(BIO *bio, const char *filename) {
return BIO_ctrl(bio, BIO_C_SET_FILENAME, BIO_CLOSE | BIO_FP_WRITE,
(char *)filename);
}
int BIO_append_filename(BIO *bio, const char *filename) {
return BIO_ctrl(bio, BIO_C_SET_FILENAME, BIO_CLOSE | BIO_FP_APPEND,
(char *)filename);
}
int BIO_rw_filename(BIO *bio, const char *filename) {
return BIO_ctrl(bio, BIO_C_SET_FILENAME,
BIO_CLOSE | BIO_FP_READ | BIO_FP_WRITE, (char *)filename);
}
#endif // OPENSSL_TRUSTY

View File

@ -0,0 +1,470 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.] */
#include <CBigNumBoringSSL_bn.h>
#include <assert.h>
#include <ctype.h>
#include <limits.h>
#include <stdio.h>
#include <CBigNumBoringSSL_bio.h>
#include <CBigNumBoringSSL_bytestring.h>
#include <CBigNumBoringSSL_err.h>
#include <CBigNumBoringSSL_mem.h>
#include "../fipsmodule/bn/internal.h"
int BN_bn2cbb_padded(CBB *out, size_t len, const BIGNUM *in) {
uint8_t *ptr;
return CBB_add_space(out, &ptr, len) && BN_bn2bin_padded(ptr, len, in);
}
static const char hextable[] = "0123456789abcdef";
char *BN_bn2hex(const BIGNUM *bn) {
int width = bn_minimal_width(bn);
char *buf = OPENSSL_malloc(1 /* leading '-' */ + 1 /* zero is non-empty */ +
width * BN_BYTES * 2 + 1 /* trailing NUL */);
if (buf == NULL) {
OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
return NULL;
}
char *p = buf;
if (bn->neg) {
*(p++) = '-';
}
if (BN_is_zero(bn)) {
*(p++) = '0';
}
int z = 0;
for (int i = width - 1; i >= 0; i--) {
for (int j = BN_BITS2 - 8; j >= 0; j -= 8) {
// strip leading zeros
int v = ((int)(bn->d[i] >> (long)j)) & 0xff;
if (z || v != 0) {
*(p++) = hextable[v >> 4];
*(p++) = hextable[v & 0x0f];
z = 1;
}
}
}
*p = '\0';
return buf;
}
// decode_hex decodes |in_len| bytes of hex data from |in| and updates |bn|.
static int decode_hex(BIGNUM *bn, const char *in, int in_len) {
if (in_len > INT_MAX/4) {
OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
return 0;
}
// |in_len| is the number of hex digits.
if (!bn_expand(bn, in_len * 4)) {
return 0;
}
int i = 0;
while (in_len > 0) {
// Decode one |BN_ULONG| at a time.
int todo = BN_BYTES * 2;
if (todo > in_len) {
todo = in_len;
}
BN_ULONG word = 0;
int j;
for (j = todo; j > 0; j--) {
char c = in[in_len - j];
BN_ULONG hex;
if (c >= '0' && c <= '9') {
hex = c - '0';
} else if (c >= 'a' && c <= 'f') {
hex = c - 'a' + 10;
} else if (c >= 'A' && c <= 'F') {
hex = c - 'A' + 10;
} else {
hex = 0;
// This shouldn't happen. The caller checks |isxdigit|.
assert(0);
}
word = (word << 4) | hex;
}
bn->d[i++] = word;
in_len -= todo;
}
assert(i <= bn->dmax);
bn->width = i;
return 1;
}
// decode_dec decodes |in_len| bytes of decimal data from |in| and updates |bn|.
static int decode_dec(BIGNUM *bn, const char *in, int in_len) {
int i, j;
BN_ULONG l = 0;
// Decode |BN_DEC_NUM| digits at a time.
j = BN_DEC_NUM - (in_len % BN_DEC_NUM);
if (j == BN_DEC_NUM) {
j = 0;
}
l = 0;
for (i = 0; i < in_len; i++) {
l *= 10;
l += in[i] - '0';
if (++j == BN_DEC_NUM) {
if (!BN_mul_word(bn, BN_DEC_CONV) ||
!BN_add_word(bn, l)) {
return 0;
}
l = 0;
j = 0;
}
}
return 1;
}
typedef int (*decode_func) (BIGNUM *bn, const char *in, int in_len);
typedef int (*char_test_func) (int c);
static int bn_x2bn(BIGNUM **outp, const char *in, decode_func decode, char_test_func want_char) {
BIGNUM *ret = NULL;
int neg = 0, i;
int num;
if (in == NULL || *in == 0) {
return 0;
}
if (*in == '-') {
neg = 1;
in++;
}
for (i = 0; want_char((unsigned char)in[i]) && i + neg < INT_MAX; i++) {}
num = i + neg;
if (outp == NULL) {
return num;
}
// in is the start of the hex digits, and it is 'i' long
if (*outp == NULL) {
ret = BN_new();
if (ret == NULL) {
return 0;
}
} else {
ret = *outp;
BN_zero(ret);
}
if (!decode(ret, in, i)) {
goto err;
}
bn_set_minimal_width(ret);
if (!BN_is_zero(ret)) {
ret->neg = neg;
}
*outp = ret;
return num;
err:
if (*outp == NULL) {
BN_free(ret);
}
return 0;
}
int BN_hex2bn(BIGNUM **outp, const char *in) {
return bn_x2bn(outp, in, decode_hex, isxdigit);
}
char *BN_bn2dec(const BIGNUM *a) {
// It is easier to print strings little-endian, so we assemble it in reverse
// and fix at the end.
BIGNUM *copy = NULL;
CBB cbb;
if (!CBB_init(&cbb, 16) ||
!CBB_add_u8(&cbb, 0 /* trailing NUL */)) {
goto cbb_err;
}
if (BN_is_zero(a)) {
if (!CBB_add_u8(&cbb, '0')) {
goto cbb_err;
}
} else {
copy = BN_dup(a);
if (copy == NULL) {
goto err;
}
while (!BN_is_zero(copy)) {
BN_ULONG word = BN_div_word(copy, BN_DEC_CONV);
if (word == (BN_ULONG)-1) {
goto err;
}
const int add_leading_zeros = !BN_is_zero(copy);
for (int i = 0; i < BN_DEC_NUM && (add_leading_zeros || word != 0); i++) {
if (!CBB_add_u8(&cbb, '0' + word % 10)) {
goto cbb_err;
}
word /= 10;
}
assert(word == 0);
}
}
if (BN_is_negative(a) &&
!CBB_add_u8(&cbb, '-')) {
goto cbb_err;
}
uint8_t *data;
size_t len;
if (!CBB_finish(&cbb, &data, &len)) {
goto cbb_err;
}
// Reverse the buffer.
for (size_t i = 0; i < len/2; i++) {
uint8_t tmp = data[i];
data[i] = data[len - 1 - i];
data[len - 1 - i] = tmp;
}
BN_free(copy);
return (char *)data;
cbb_err:
OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
err:
BN_free(copy);
CBB_cleanup(&cbb);
return NULL;
}
int BN_dec2bn(BIGNUM **outp, const char *in) {
return bn_x2bn(outp, in, decode_dec, isdigit);
}
int BN_asc2bn(BIGNUM **outp, const char *in) {
const char *const orig_in = in;
if (*in == '-') {
in++;
}
if (in[0] == '0' && (in[1] == 'X' || in[1] == 'x')) {
if (!BN_hex2bn(outp, in+2)) {
return 0;
}
} else {
if (!BN_dec2bn(outp, in)) {
return 0;
}
}
if (*orig_in == '-' && !BN_is_zero(*outp)) {
(*outp)->neg = 1;
}
return 1;
}
int BN_print(BIO *bp, const BIGNUM *a) {
int i, j, v, z = 0;
int ret = 0;
if (a->neg && BIO_write(bp, "-", 1) != 1) {
goto end;
}
if (BN_is_zero(a) && BIO_write(bp, "0", 1) != 1) {
goto end;
}
for (i = bn_minimal_width(a) - 1; i >= 0; i--) {
for (j = BN_BITS2 - 4; j >= 0; j -= 4) {
// strip leading zeros
v = ((int)(a->d[i] >> (long)j)) & 0x0f;
if (z || v != 0) {
if (BIO_write(bp, &hextable[v], 1) != 1) {
goto end;
}
z = 1;
}
}
}
ret = 1;
end:
return ret;
}
int BN_print_fp(FILE *fp, const BIGNUM *a) {
BIO *b = BIO_new_fp(fp, BIO_NOCLOSE);
if (b == NULL) {
return 0;
}
int ret = BN_print(b, a);
BIO_free(b);
return ret;
}
size_t BN_bn2mpi(const BIGNUM *in, uint8_t *out) {
const size_t bits = BN_num_bits(in);
const size_t bytes = (bits + 7) / 8;
// If the number of bits is a multiple of 8, i.e. if the MSB is set,
// prefix with a zero byte.
int extend = 0;
if (bytes != 0 && (bits & 0x07) == 0) {
extend = 1;
}
const size_t len = bytes + extend;
if (len < bytes ||
4 + len < len ||
(len & 0xffffffff) != len) {
// If we cannot represent the number then we emit zero as the interface
// doesn't allow an error to be signalled.
if (out) {
OPENSSL_memset(out, 0, 4);
}
return 4;
}
if (out == NULL) {
return 4 + len;
}
out[0] = len >> 24;
out[1] = len >> 16;
out[2] = len >> 8;
out[3] = len;
if (extend) {
out[4] = 0;
}
BN_bn2bin(in, out + 4 + extend);
if (in->neg && len > 0) {
out[4] |= 0x80;
}
return len + 4;
}
BIGNUM *BN_mpi2bn(const uint8_t *in, size_t len, BIGNUM *out) {
if (len < 4) {
OPENSSL_PUT_ERROR(BN, BN_R_BAD_ENCODING);
return NULL;
}
const size_t in_len = ((size_t)in[0] << 24) |
((size_t)in[1] << 16) |
((size_t)in[2] << 8) |
((size_t)in[3]);
if (in_len != len - 4) {
OPENSSL_PUT_ERROR(BN, BN_R_BAD_ENCODING);
return NULL;
}
int out_is_alloced = 0;
if (out == NULL) {
out = BN_new();
if (out == NULL) {
OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
return NULL;
}
out_is_alloced = 1;
}
if (in_len == 0) {
BN_zero(out);
return out;
}
in += 4;
if (BN_bin2bn(in, in_len, out) == NULL) {
if (out_is_alloced) {
BN_free(out);
}
return NULL;
}
out->neg = ((*in) & 0x80) != 0;
if (out->neg) {
BN_clear_bit(out, BN_num_bits(out) - 1);
}
return out;
}
int BN_bn2binpad(const BIGNUM *in, uint8_t *out, int len) {
if (len < 0 ||
!BN_bn2bin_padded(out, (size_t)len, in)) {
return -1;
}
return len;
}

View File

@ -0,0 +1,52 @@
/* Copyright (c) 2016, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#include <CBigNumBoringSSL_bytestring.h>
#include <assert.h>
#include <limits.h>
#include <string.h>
#include <CBigNumBoringSSL_mem.h>
#include "internal.h"
#include "../internal.h"
int CBB_finish_i2d(CBB *cbb, uint8_t **outp) {
assert(cbb->base->can_resize);
uint8_t *der;
size_t der_len;
if (!CBB_finish(cbb, &der, &der_len)) {
CBB_cleanup(cbb);
return -1;
}
if (der_len > INT_MAX) {
OPENSSL_free(der);
return -1;
}
if (outp != NULL) {
if (*outp == NULL) {
*outp = der;
der = NULL;
} else {
OPENSSL_memcpy(*outp, der, der_len);
*outp += der_len;
}
}
OPENSSL_free(der);
return (int)der_len;
}

View File

@ -0,0 +1,265 @@
/* Copyright (c) 2014, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#include <CBigNumBoringSSL_bytestring.h>
#include <assert.h>
#include <string.h>
#include "internal.h"
#include "../internal.h"
// kMaxDepth is a just a sanity limit. The code should be such that the length
// of the input being processes always decreases. None the less, a very large
// input could otherwise cause the stack to overflow.
static const unsigned kMaxDepth = 2048;
// is_string_type returns one if |tag| is a string type and zero otherwise. It
// ignores the constructed bit.
static int is_string_type(unsigned tag) {
switch (tag & ~CBS_ASN1_CONSTRUCTED) {
case CBS_ASN1_BITSTRING:
case CBS_ASN1_OCTETSTRING:
case CBS_ASN1_UTF8STRING:
case CBS_ASN1_NUMERICSTRING:
case CBS_ASN1_PRINTABLESTRING:
case CBS_ASN1_T61STRING:
case CBS_ASN1_VIDEOTEXSTRING:
case CBS_ASN1_IA5STRING:
case CBS_ASN1_GRAPHICSTRING:
case CBS_ASN1_VISIBLESTRING:
case CBS_ASN1_GENERALSTRING:
case CBS_ASN1_UNIVERSALSTRING:
case CBS_ASN1_BMPSTRING:
return 1;
default:
return 0;
}
}
// cbs_find_ber walks an ASN.1 structure in |orig_in| and sets |*ber_found|
// depending on whether an indefinite length element or constructed string was
// found. The value of |orig_in| is not changed. It returns one on success (i.e.
// |*ber_found| was set) and zero on error.
static int cbs_find_ber(const CBS *orig_in, char *ber_found, unsigned depth) {
CBS in;
if (depth > kMaxDepth) {
return 0;
}
CBS_init(&in, CBS_data(orig_in), CBS_len(orig_in));
*ber_found = 0;
while (CBS_len(&in) > 0) {
CBS contents;
unsigned tag;
size_t header_len;
if (!CBS_get_any_ber_asn1_element(&in, &contents, &tag, &header_len)) {
return 0;
}
if (CBS_len(&contents) == header_len &&
header_len > 0 &&
CBS_data(&contents)[header_len-1] == 0x80) {
// Found an indefinite-length element.
*ber_found = 1;
return 1;
}
if (tag & CBS_ASN1_CONSTRUCTED) {
if (is_string_type(tag)) {
// Constructed strings are only legal in BER and require conversion.
*ber_found = 1;
return 1;
}
if (!CBS_skip(&contents, header_len) ||
!cbs_find_ber(&contents, ber_found, depth + 1)) {
return 0;
}
}
}
return 1;
}
// is_eoc returns true if |header_len| and |contents|, as returned by
// |CBS_get_any_ber_asn1_element|, indicate an "end of contents" (EOC) value.
static char is_eoc(size_t header_len, CBS *contents) {
return header_len == 2 && CBS_len(contents) == 2 &&
OPENSSL_memcmp(CBS_data(contents), "\x00\x00", 2) == 0;
}
// cbs_convert_ber reads BER data from |in| and writes DER data to |out|. If
// |string_tag| is non-zero, then all elements must match |string_tag| up to the
// constructed bit and primitive element bodies are written to |out| without
// element headers. This is used when concatenating the fragments of a
// constructed string. If |looking_for_eoc| is set then any EOC elements found
// will cause the function to return after consuming it. It returns one on
// success and zero on error.
static int cbs_convert_ber(CBS *in, CBB *out, unsigned string_tag,
char looking_for_eoc, unsigned depth) {
assert(!(string_tag & CBS_ASN1_CONSTRUCTED));
if (depth > kMaxDepth) {
return 0;
}
while (CBS_len(in) > 0) {
CBS contents;
unsigned tag, child_string_tag = string_tag;
size_t header_len;
CBB *out_contents, out_contents_storage;
if (!CBS_get_any_ber_asn1_element(in, &contents, &tag, &header_len)) {
return 0;
}
if (is_eoc(header_len, &contents)) {
return looking_for_eoc;
}
if (string_tag != 0) {
// This is part of a constructed string. All elements must match
// |string_tag| up to the constructed bit and get appended to |out|
// without a child element.
if ((tag & ~CBS_ASN1_CONSTRUCTED) != string_tag) {
return 0;
}
out_contents = out;
} else {
unsigned out_tag = tag;
if ((tag & CBS_ASN1_CONSTRUCTED) && is_string_type(tag)) {
// If a constructed string, clear the constructed bit and inform
// children to concatenate bodies.
out_tag &= ~CBS_ASN1_CONSTRUCTED;
child_string_tag = out_tag;
}
if (!CBB_add_asn1(out, &out_contents_storage, out_tag)) {
return 0;
}
out_contents = &out_contents_storage;
}
if (CBS_len(&contents) == header_len && header_len > 0 &&
CBS_data(&contents)[header_len - 1] == 0x80) {
// This is an indefinite length element.
if (!cbs_convert_ber(in, out_contents, child_string_tag,
1 /* looking for eoc */, depth + 1) ||
!CBB_flush(out)) {
return 0;
}
continue;
}
if (!CBS_skip(&contents, header_len)) {
return 0;
}
if (tag & CBS_ASN1_CONSTRUCTED) {
// Recurse into children.
if (!cbs_convert_ber(&contents, out_contents, child_string_tag,
0 /* not looking for eoc */, depth + 1)) {
return 0;
}
} else {
// Copy primitive contents as-is.
if (!CBB_add_bytes(out_contents, CBS_data(&contents),
CBS_len(&contents))) {
return 0;
}
}
if (!CBB_flush(out)) {
return 0;
}
}
return looking_for_eoc == 0;
}
int CBS_asn1_ber_to_der(CBS *in, CBS *out, uint8_t **out_storage) {
CBB cbb;
// First, do a quick walk to find any indefinite-length elements. Most of the
// time we hope that there aren't any and thus we can quickly return.
char conversion_needed;
if (!cbs_find_ber(in, &conversion_needed, 0)) {
return 0;
}
if (!conversion_needed) {
if (!CBS_get_any_asn1_element(in, out, NULL, NULL)) {
return 0;
}
*out_storage = NULL;
return 1;
}
size_t len;
if (!CBB_init(&cbb, CBS_len(in)) ||
!cbs_convert_ber(in, &cbb, 0, 0, 0) ||
!CBB_finish(&cbb, out_storage, &len)) {
CBB_cleanup(&cbb);
return 0;
}
CBS_init(out, *out_storage, len);
return 1;
}
int CBS_get_asn1_implicit_string(CBS *in, CBS *out, uint8_t **out_storage,
unsigned outer_tag, unsigned inner_tag) {
assert(!(outer_tag & CBS_ASN1_CONSTRUCTED));
assert(!(inner_tag & CBS_ASN1_CONSTRUCTED));
assert(is_string_type(inner_tag));
if (CBS_peek_asn1_tag(in, outer_tag)) {
// Normal implicitly-tagged string.
*out_storage = NULL;
return CBS_get_asn1(in, out, outer_tag);
}
// Otherwise, try to parse an implicitly-tagged constructed string.
// |CBS_asn1_ber_to_der| is assumed to have run, so only allow one level deep
// of nesting.
CBB result;
CBS child;
if (!CBB_init(&result, CBS_len(in)) ||
!CBS_get_asn1(in, &child, outer_tag | CBS_ASN1_CONSTRUCTED)) {
goto err;
}
while (CBS_len(&child) > 0) {
CBS chunk;
if (!CBS_get_asn1(&child, &chunk, inner_tag) ||
!CBB_add_bytes(&result, CBS_data(&chunk), CBS_len(&chunk))) {
goto err;
}
}
uint8_t *data;
size_t len;
if (!CBB_finish(&result, &data, &len)) {
goto err;
}
CBS_init(out, data, len);
*out_storage = data;
return 1;
err:
CBB_cleanup(&result);
return 0;
}

View File

@ -0,0 +1,719 @@
/* Copyright (c) 2014, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#include <CBigNumBoringSSL_bytestring.h>
#include <assert.h>
#include <limits.h>
#include <string.h>
#include <CBigNumBoringSSL_mem.h>
#include "../internal.h"
void CBB_zero(CBB *cbb) {
OPENSSL_memset(cbb, 0, sizeof(CBB));
}
static int cbb_init(CBB *cbb, uint8_t *buf, size_t cap) {
// This assumes that |cbb| has already been zeroed.
struct cbb_buffer_st *base;
base = OPENSSL_malloc(sizeof(struct cbb_buffer_st));
if (base == NULL) {
return 0;
}
base->buf = buf;
base->len = 0;
base->cap = cap;
base->can_resize = 1;
base->error = 0;
cbb->base = base;
cbb->is_child = 0;
return 1;
}
int CBB_init(CBB *cbb, size_t initial_capacity) {
CBB_zero(cbb);
uint8_t *buf = OPENSSL_malloc(initial_capacity);
if (initial_capacity > 0 && buf == NULL) {
return 0;
}
if (!cbb_init(cbb, buf, initial_capacity)) {
OPENSSL_free(buf);
return 0;
}
return 1;
}
int CBB_init_fixed(CBB *cbb, uint8_t *buf, size_t len) {
CBB_zero(cbb);
if (!cbb_init(cbb, buf, len)) {
return 0;
}
cbb->base->can_resize = 0;
return 1;
}
void CBB_cleanup(CBB *cbb) {
// Child |CBB|s are non-owning. They are implicitly discarded and should not
// be used with |CBB_cleanup| or |ScopedCBB|.
assert(!cbb->is_child);
if (cbb->is_child) {
return;
}
if (cbb->base) {
if (cbb->base->can_resize) {
OPENSSL_free(cbb->base->buf);
}
OPENSSL_free(cbb->base);
}
cbb->base = NULL;
}
static int cbb_buffer_reserve(struct cbb_buffer_st *base, uint8_t **out,
size_t len) {
size_t newlen;
if (base == NULL) {
return 0;
}
newlen = base->len + len;
if (newlen < base->len) {
// Overflow
goto err;
}
if (newlen > base->cap) {
size_t newcap = base->cap * 2;
uint8_t *newbuf;
if (!base->can_resize) {
goto err;
}
if (newcap < base->cap || newcap < newlen) {
newcap = newlen;
}
newbuf = OPENSSL_realloc(base->buf, newcap);
if (newbuf == NULL) {
goto err;
}
base->buf = newbuf;
base->cap = newcap;
}
if (out) {
*out = base->buf + base->len;
}
return 1;
err:
base->error = 1;
return 0;
}
static int cbb_buffer_add(struct cbb_buffer_st *base, uint8_t **out,
size_t len) {
if (!cbb_buffer_reserve(base, out, len)) {
return 0;
}
// This will not overflow or |cbb_buffer_reserve| would have failed.
base->len += len;
return 1;
}
static int cbb_buffer_add_u(struct cbb_buffer_st *base, uint64_t v,
size_t len_len) {
if (len_len == 0) {
return 1;
}
uint8_t *buf;
if (!cbb_buffer_add(base, &buf, len_len)) {
return 0;
}
for (size_t i = len_len - 1; i < len_len; i--) {
buf[i] = v;
v >>= 8;
}
if (v != 0) {
base->error = 1;
return 0;
}
return 1;
}
int CBB_finish(CBB *cbb, uint8_t **out_data, size_t *out_len) {
if (cbb->is_child) {
return 0;
}
if (!CBB_flush(cbb)) {
return 0;
}
if (cbb->base->can_resize && (out_data == NULL || out_len == NULL)) {
// |out_data| and |out_len| can only be NULL if the CBB is fixed.
return 0;
}
if (out_data != NULL) {
*out_data = cbb->base->buf;
}
if (out_len != NULL) {
*out_len = cbb->base->len;
}
cbb->base->buf = NULL;
CBB_cleanup(cbb);
return 1;
}
// CBB_flush recurses and then writes out any pending length prefix. The
// current length of the underlying base is taken to be the length of the
// length-prefixed data.
int CBB_flush(CBB *cbb) {
size_t child_start, i, len;
// If |cbb->base| has hit an error, the buffer is in an undefined state, so
// fail all following calls. In particular, |cbb->child| may point to invalid
// memory.
if (cbb->base == NULL || cbb->base->error) {
return 0;
}
if (cbb->child == NULL || cbb->child->pending_len_len == 0) {
return 1;
}
child_start = cbb->child->offset + cbb->child->pending_len_len;
if (!CBB_flush(cbb->child) ||
child_start < cbb->child->offset ||
cbb->base->len < child_start) {
goto err;
}
len = cbb->base->len - child_start;
if (cbb->child->pending_is_asn1) {
// For ASN.1 we assume that we'll only need a single byte for the length.
// If that turned out to be incorrect, we have to move the contents along
// in order to make space.
uint8_t len_len;
uint8_t initial_length_byte;
assert (cbb->child->pending_len_len == 1);
if (len > 0xfffffffe) {
// Too large.
goto err;
} else if (len > 0xffffff) {
len_len = 5;
initial_length_byte = 0x80 | 4;
} else if (len > 0xffff) {
len_len = 4;
initial_length_byte = 0x80 | 3;
} else if (len > 0xff) {
len_len = 3;
initial_length_byte = 0x80 | 2;
} else if (len > 0x7f) {
len_len = 2;
initial_length_byte = 0x80 | 1;
} else {
len_len = 1;
initial_length_byte = (uint8_t)len;
len = 0;
}
if (len_len != 1) {
// We need to move the contents along in order to make space.
size_t extra_bytes = len_len - 1;
if (!cbb_buffer_add(cbb->base, NULL, extra_bytes)) {
goto err;
}
OPENSSL_memmove(cbb->base->buf + child_start + extra_bytes,
cbb->base->buf + child_start, len);
}
cbb->base->buf[cbb->child->offset++] = initial_length_byte;
cbb->child->pending_len_len = len_len - 1;
}
for (i = cbb->child->pending_len_len - 1; i < cbb->child->pending_len_len;
i--) {
cbb->base->buf[cbb->child->offset + i] = (uint8_t)len;
len >>= 8;
}
if (len != 0) {
goto err;
}
cbb->child->base = NULL;
cbb->child = NULL;
return 1;
err:
cbb->base->error = 1;
return 0;
}
const uint8_t *CBB_data(const CBB *cbb) {
assert(cbb->child == NULL);
return cbb->base->buf + cbb->offset + cbb->pending_len_len;
}
size_t CBB_len(const CBB *cbb) {
assert(cbb->child == NULL);
assert(cbb->offset + cbb->pending_len_len <= cbb->base->len);
return cbb->base->len - cbb->offset - cbb->pending_len_len;
}
static int cbb_add_length_prefixed(CBB *cbb, CBB *out_contents,
uint8_t len_len) {
uint8_t *prefix_bytes;
if (!CBB_flush(cbb)) {
return 0;
}
size_t offset = cbb->base->len;
if (!cbb_buffer_add(cbb->base, &prefix_bytes, len_len)) {
return 0;
}
OPENSSL_memset(prefix_bytes, 0, len_len);
OPENSSL_memset(out_contents, 0, sizeof(CBB));
out_contents->base = cbb->base;
out_contents->is_child = 1;
cbb->child = out_contents;
cbb->child->offset = offset;
cbb->child->pending_len_len = len_len;
cbb->child->pending_is_asn1 = 0;
return 1;
}
int CBB_add_u8_length_prefixed(CBB *cbb, CBB *out_contents) {
return cbb_add_length_prefixed(cbb, out_contents, 1);
}
int CBB_add_u16_length_prefixed(CBB *cbb, CBB *out_contents) {
return cbb_add_length_prefixed(cbb, out_contents, 2);
}
int CBB_add_u24_length_prefixed(CBB *cbb, CBB *out_contents) {
return cbb_add_length_prefixed(cbb, out_contents, 3);
}
// add_base128_integer encodes |v| as a big-endian base-128 integer where the
// high bit of each byte indicates where there is more data. This is the
// encoding used in DER for both high tag number form and OID components.
static int add_base128_integer(CBB *cbb, uint64_t v) {
unsigned len_len = 0;
uint64_t copy = v;
while (copy > 0) {
len_len++;
copy >>= 7;
}
if (len_len == 0) {
len_len = 1; // Zero is encoded with one byte.
}
for (unsigned i = len_len - 1; i < len_len; i--) {
uint8_t byte = (v >> (7 * i)) & 0x7f;
if (i != 0) {
// The high bit denotes whether there is more data.
byte |= 0x80;
}
if (!CBB_add_u8(cbb, byte)) {
return 0;
}
}
return 1;
}
int CBB_add_asn1(CBB *cbb, CBB *out_contents, unsigned tag) {
if (!CBB_flush(cbb)) {
return 0;
}
// Split the tag into leading bits and tag number.
uint8_t tag_bits = (tag >> CBS_ASN1_TAG_SHIFT) & 0xe0;
unsigned tag_number = tag & CBS_ASN1_TAG_NUMBER_MASK;
if (tag_number >= 0x1f) {
// Set all the bits in the tag number to signal high tag number form.
if (!CBB_add_u8(cbb, tag_bits | 0x1f) ||
!add_base128_integer(cbb, tag_number)) {
return 0;
}
} else if (!CBB_add_u8(cbb, tag_bits | tag_number)) {
return 0;
}
size_t offset = cbb->base->len;
if (!CBB_add_u8(cbb, 0)) {
return 0;
}
OPENSSL_memset(out_contents, 0, sizeof(CBB));
out_contents->base = cbb->base;
out_contents->is_child = 1;
cbb->child = out_contents;
cbb->child->offset = offset;
cbb->child->pending_len_len = 1;
cbb->child->pending_is_asn1 = 1;
return 1;
}
int CBB_add_bytes(CBB *cbb, const uint8_t *data, size_t len) {
uint8_t *dest;
if (!CBB_flush(cbb) ||
!cbb_buffer_add(cbb->base, &dest, len)) {
return 0;
}
OPENSSL_memcpy(dest, data, len);
return 1;
}
int CBB_add_space(CBB *cbb, uint8_t **out_data, size_t len) {
if (!CBB_flush(cbb) ||
!cbb_buffer_add(cbb->base, out_data, len)) {
return 0;
}
return 1;
}
int CBB_reserve(CBB *cbb, uint8_t **out_data, size_t len) {
if (!CBB_flush(cbb) ||
!cbb_buffer_reserve(cbb->base, out_data, len)) {
return 0;
}
return 1;
}
int CBB_did_write(CBB *cbb, size_t len) {
size_t newlen = cbb->base->len + len;
if (cbb->child != NULL ||
newlen < cbb->base->len ||
newlen > cbb->base->cap) {
return 0;
}
cbb->base->len = newlen;
return 1;
}
int CBB_add_u8(CBB *cbb, uint8_t value) {
if (!CBB_flush(cbb)) {
return 0;
}
return cbb_buffer_add_u(cbb->base, value, 1);
}
int CBB_add_u16(CBB *cbb, uint16_t value) {
if (!CBB_flush(cbb)) {
return 0;
}
return cbb_buffer_add_u(cbb->base, value, 2);
}
int CBB_add_u16le(CBB *cbb, uint16_t value) {
return CBB_add_u16(cbb, CRYPTO_bswap2(value));
}
int CBB_add_u24(CBB *cbb, uint32_t value) {
if (!CBB_flush(cbb)) {
return 0;
}
return cbb_buffer_add_u(cbb->base, value, 3);
}
int CBB_add_u32(CBB *cbb, uint32_t value) {
if (!CBB_flush(cbb)) {
return 0;
}
return cbb_buffer_add_u(cbb->base, value, 4);
}
int CBB_add_u32le(CBB *cbb, uint32_t value) {
return CBB_add_u32(cbb, CRYPTO_bswap4(value));
}
int CBB_add_u64(CBB *cbb, uint64_t value) {
if (!CBB_flush(cbb)) {
return 0;
}
return cbb_buffer_add_u(cbb->base, value, 8);
}
int CBB_add_u64le(CBB *cbb, uint64_t value) {
return CBB_add_u64(cbb, CRYPTO_bswap8(value));
}
void CBB_discard_child(CBB *cbb) {
if (cbb->child == NULL) {
return;
}
cbb->base->len = cbb->child->offset;
cbb->child->base = NULL;
cbb->child = NULL;
}
int CBB_add_asn1_uint64(CBB *cbb, uint64_t value) {
CBB child;
int started = 0;
if (!CBB_add_asn1(cbb, &child, CBS_ASN1_INTEGER)) {
return 0;
}
for (size_t i = 0; i < 8; i++) {
uint8_t byte = (value >> 8*(7-i)) & 0xff;
if (!started) {
if (byte == 0) {
// Don't encode leading zeros.
continue;
}
// If the high bit is set, add a padding byte to make it
// unsigned.
if ((byte & 0x80) && !CBB_add_u8(&child, 0)) {
return 0;
}
started = 1;
}
if (!CBB_add_u8(&child, byte)) {
return 0;
}
}
// 0 is encoded as a single 0, not the empty string.
if (!started && !CBB_add_u8(&child, 0)) {
return 0;
}
return CBB_flush(cbb);
}
int CBB_add_asn1_int64(CBB *cbb, int64_t value) {
if (value >= 0) {
return CBB_add_asn1_uint64(cbb, value);
}
union {
int64_t i;
uint8_t bytes[sizeof(int64_t)];
} u;
u.i = value;
int start = 7;
// Skip leading sign-extension bytes unless they are necessary.
while (start > 0 && (u.bytes[start] == 0xff && (u.bytes[start - 1] & 0x80))) {
start--;
}
CBB child;
if (!CBB_add_asn1(cbb, &child, CBS_ASN1_INTEGER)) {
return 0;
}
for (int i = start; i >= 0; i--) {
if (!CBB_add_u8(&child, u.bytes[i])) {
return 0;
}
}
return CBB_flush(cbb);
}
int CBB_add_asn1_octet_string(CBB *cbb, const uint8_t *data, size_t data_len) {
CBB child;
if (!CBB_add_asn1(cbb, &child, CBS_ASN1_OCTETSTRING) ||
!CBB_add_bytes(&child, data, data_len) ||
!CBB_flush(cbb)) {
return 0;
}
return 1;
}
int CBB_add_asn1_bool(CBB *cbb, int value) {
CBB child;
if (!CBB_add_asn1(cbb, &child, CBS_ASN1_BOOLEAN) ||
!CBB_add_u8(&child, value != 0 ? 0xff : 0) ||
!CBB_flush(cbb)) {
return 0;
}
return 1;
}
// parse_dotted_decimal parses one decimal component from |cbs|, where |cbs| is
// an OID literal, e.g., "1.2.840.113554.4.1.72585". It consumes both the
// component and the dot, so |cbs| may be passed into the function again for the
// next value.
static int parse_dotted_decimal(CBS *cbs, uint64_t *out) {
*out = 0;
int seen_digit = 0;
for (;;) {
// Valid terminators for a component are the end of the string or a
// non-terminal dot. If the string ends with a dot, this is not a valid OID
// string.
uint8_t u;
if (!CBS_get_u8(cbs, &u) ||
(u == '.' && CBS_len(cbs) > 0)) {
break;
}
if (u < '0' || u > '9' ||
// Forbid stray leading zeros.
(seen_digit && *out == 0) ||
// Check for overflow.
*out > UINT64_MAX / 10 ||
*out * 10 > UINT64_MAX - (u - '0')) {
return 0;
}
*out = *out * 10 + (u - '0');
seen_digit = 1;
}
// The empty string is not a legal OID component.
return seen_digit;
}
int CBB_add_asn1_oid_from_text(CBB *cbb, const char *text, size_t len) {
if (!CBB_flush(cbb)) {
return 0;
}
CBS cbs;
CBS_init(&cbs, (const uint8_t *)text, len);
// OIDs must have at least two components.
uint64_t a, b;
if (!parse_dotted_decimal(&cbs, &a) ||
!parse_dotted_decimal(&cbs, &b)) {
return 0;
}
// The first component is encoded as 40 * |a| + |b|. This assumes that |a| is
// 0, 1, or 2 and that, when it is 0 or 1, |b| is at most 39.
if (a > 2 ||
(a < 2 && b > 39) ||
b > UINT64_MAX - 80 ||
!add_base128_integer(cbb, 40u * a + b)) {
return 0;
}
// The remaining components are encoded unmodified.
while (CBS_len(&cbs) > 0) {
if (!parse_dotted_decimal(&cbs, &a) ||
!add_base128_integer(cbb, a)) {
return 0;
}
}
return 1;
}
static int compare_set_of_element(const void *a_ptr, const void *b_ptr) {
// See X.690, section 11.6 for the ordering. They are sorted in ascending
// order by their DER encoding.
const CBS *a = a_ptr, *b = b_ptr;
size_t a_len = CBS_len(a), b_len = CBS_len(b);
size_t min_len = a_len < b_len ? a_len : b_len;
int ret = OPENSSL_memcmp(CBS_data(a), CBS_data(b), min_len);
if (ret != 0) {
return ret;
}
if (a_len == b_len) {
return 0;
}
// If one is a prefix of the other, the shorter one sorts first. (This is not
// actually reachable. No DER encoding is a prefix of another DER encoding.)
return a_len < b_len ? -1 : 1;
}
int CBB_flush_asn1_set_of(CBB *cbb) {
if (!CBB_flush(cbb)) {
return 0;
}
CBS cbs;
size_t num_children = 0;
CBS_init(&cbs, CBB_data(cbb), CBB_len(cbb));
while (CBS_len(&cbs) != 0) {
if (!CBS_get_any_asn1_element(&cbs, NULL, NULL, NULL)) {
return 0;
}
num_children++;
}
if (num_children < 2) {
return 1; // Nothing to do. This is the common case for X.509.
}
if (num_children > ((size_t)-1) / sizeof(CBS)) {
return 0; // Overflow.
}
// Parse out the children and sort. We alias them into a copy of so they
// remain valid as we rewrite |cbb|.
int ret = 0;
size_t buf_len = CBB_len(cbb);
uint8_t *buf = OPENSSL_memdup(CBB_data(cbb), buf_len);
CBS *children = OPENSSL_malloc(num_children * sizeof(CBS));
if (buf == NULL || children == NULL) {
goto err;
}
CBS_init(&cbs, buf, buf_len);
for (size_t i = 0; i < num_children; i++) {
if (!CBS_get_any_asn1_element(&cbs, &children[i], NULL, NULL)) {
goto err;
}
}
qsort(children, num_children, sizeof(CBS), compare_set_of_element);
// Rewind |cbb| and write the contents back in the new order.
cbb->base->len = cbb->offset + cbb->pending_len_len;
for (size_t i = 0; i < num_children; i++) {
if (!CBB_add_bytes(cbb, CBS_data(&children[i]), CBS_len(&children[i]))) {
goto err;
}
}
assert(CBB_len(cbb) == buf_len);
ret = 1;
err:
OPENSSL_free(buf);
OPENSSL_free(children);
return ret;
}

View File

@ -0,0 +1,688 @@
/* Copyright (c) 2014, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#include <CBigNumBoringSSL_mem.h>
#include <CBigNumBoringSSL_bytestring.h>
#include <assert.h>
#include <inttypes.h>
#include <string.h>
#include "internal.h"
#include "../internal.h"
void CBS_init(CBS *cbs, const uint8_t *data, size_t len) {
cbs->data = data;
cbs->len = len;
}
static int cbs_get(CBS *cbs, const uint8_t **p, size_t n) {
if (cbs->len < n) {
return 0;
}
*p = cbs->data;
cbs->data += n;
cbs->len -= n;
return 1;
}
int CBS_skip(CBS *cbs, size_t len) {
const uint8_t *dummy;
return cbs_get(cbs, &dummy, len);
}
const uint8_t *CBS_data(const CBS *cbs) {
return cbs->data;
}
size_t CBS_len(const CBS *cbs) {
return cbs->len;
}
int CBS_stow(const CBS *cbs, uint8_t **out_ptr, size_t *out_len) {
OPENSSL_free(*out_ptr);
*out_ptr = NULL;
*out_len = 0;
if (cbs->len == 0) {
return 1;
}
*out_ptr = OPENSSL_memdup(cbs->data, cbs->len);
if (*out_ptr == NULL) {
return 0;
}
*out_len = cbs->len;
return 1;
}
int CBS_strdup(const CBS *cbs, char **out_ptr) {
if (*out_ptr != NULL) {
OPENSSL_free(*out_ptr);
}
*out_ptr = OPENSSL_strndup((const char*)cbs->data, cbs->len);
return (*out_ptr != NULL);
}
int CBS_contains_zero_byte(const CBS *cbs) {
return OPENSSL_memchr(cbs->data, 0, cbs->len) != NULL;
}
int CBS_mem_equal(const CBS *cbs, const uint8_t *data, size_t len) {
if (len != cbs->len) {
return 0;
}
return CRYPTO_memcmp(cbs->data, data, len) == 0;
}
static int cbs_get_u(CBS *cbs, uint64_t *out, size_t len) {
uint64_t result = 0;
const uint8_t *data;
if (!cbs_get(cbs, &data, len)) {
return 0;
}
for (size_t i = 0; i < len; i++) {
result <<= 8;
result |= data[i];
}
*out = result;
return 1;
}
int CBS_get_u8(CBS *cbs, uint8_t *out) {
const uint8_t *v;
if (!cbs_get(cbs, &v, 1)) {
return 0;
}
*out = *v;
return 1;
}
int CBS_get_u16(CBS *cbs, uint16_t *out) {
uint64_t v;
if (!cbs_get_u(cbs, &v, 2)) {
return 0;
}
*out = v;
return 1;
}
int CBS_get_u16le(CBS *cbs, uint16_t *out) {
if (!CBS_get_u16(cbs, out)) {
return 0;
}
*out = CRYPTO_bswap2(*out);
return 1;
}
int CBS_get_u24(CBS *cbs, uint32_t *out) {
uint64_t v;
if (!cbs_get_u(cbs, &v, 3)) {
return 0;
}
*out = v;
return 1;
}
int CBS_get_u32(CBS *cbs, uint32_t *out) {
uint64_t v;
if (!cbs_get_u(cbs, &v, 4)) {
return 0;
}
*out = v;
return 1;
}
int CBS_get_u32le(CBS *cbs, uint32_t *out) {
if (!CBS_get_u32(cbs, out)) {
return 0;
}
*out = CRYPTO_bswap4(*out);
return 1;
}
int CBS_get_u64(CBS *cbs, uint64_t *out) {
return cbs_get_u(cbs, out, 8);
}
int CBS_get_u64le(CBS *cbs, uint64_t *out) {
if (!cbs_get_u(cbs, out, 8)) {
return 0;
}
*out = CRYPTO_bswap8(*out);
return 1;
}
int CBS_get_last_u8(CBS *cbs, uint8_t *out) {
if (cbs->len == 0) {
return 0;
}
*out = cbs->data[cbs->len - 1];
cbs->len--;
return 1;
}
int CBS_get_bytes(CBS *cbs, CBS *out, size_t len) {
const uint8_t *v;
if (!cbs_get(cbs, &v, len)) {
return 0;
}
CBS_init(out, v, len);
return 1;
}
int CBS_copy_bytes(CBS *cbs, uint8_t *out, size_t len) {
const uint8_t *v;
if (!cbs_get(cbs, &v, len)) {
return 0;
}
OPENSSL_memcpy(out, v, len);
return 1;
}
static int cbs_get_length_prefixed(CBS *cbs, CBS *out, size_t len_len) {
uint64_t len;
if (!cbs_get_u(cbs, &len, len_len)) {
return 0;
}
// If |len_len| <= 3 then we know that |len| will fit into a |size_t|, even on
// 32-bit systems.
assert(len_len <= 3);
return CBS_get_bytes(cbs, out, len);
}
int CBS_get_u8_length_prefixed(CBS *cbs, CBS *out) {
return cbs_get_length_prefixed(cbs, out, 1);
}
int CBS_get_u16_length_prefixed(CBS *cbs, CBS *out) {
return cbs_get_length_prefixed(cbs, out, 2);
}
int CBS_get_u24_length_prefixed(CBS *cbs, CBS *out) {
return cbs_get_length_prefixed(cbs, out, 3);
}
// parse_base128_integer reads a big-endian base-128 integer from |cbs| and sets
// |*out| to the result. This is the encoding used in DER for both high tag
// number form and OID components.
static int parse_base128_integer(CBS *cbs, uint64_t *out) {
uint64_t v = 0;
uint8_t b;
do {
if (!CBS_get_u8(cbs, &b)) {
return 0;
}
if ((v >> (64 - 7)) != 0) {
// The value is too large.
return 0;
}
if (v == 0 && b == 0x80) {
// The value must be minimally encoded.
return 0;
}
v = (v << 7) | (b & 0x7f);
// Values end at an octet with the high bit cleared.
} while (b & 0x80);
*out = v;
return 1;
}
static int parse_asn1_tag(CBS *cbs, unsigned *out) {
uint8_t tag_byte;
if (!CBS_get_u8(cbs, &tag_byte)) {
return 0;
}
// ITU-T X.690 section 8.1.2.3 specifies the format for identifiers with a tag
// number no greater than 30.
//
// If the number portion is 31 (0x1f, the largest value that fits in the
// allotted bits), then the tag is more than one byte long and the
// continuation bytes contain the tag number. This parser only supports tag
// numbers less than 31 (and thus single-byte tags).
unsigned tag = ((unsigned)tag_byte & 0xe0) << CBS_ASN1_TAG_SHIFT;
unsigned tag_number = tag_byte & 0x1f;
if (tag_number == 0x1f) {
uint64_t v;
if (!parse_base128_integer(cbs, &v) ||
// Check the tag number is within our supported bounds.
v > CBS_ASN1_TAG_NUMBER_MASK ||
// Small tag numbers should have used low tag number form.
v < 0x1f) {
return 0;
}
tag_number = (unsigned)v;
}
tag |= tag_number;
*out = tag;
return 1;
}
static int cbs_get_any_asn1_element(CBS *cbs, CBS *out, unsigned *out_tag,
size_t *out_header_len, int ber_ok) {
CBS header = *cbs;
CBS throwaway;
if (out == NULL) {
out = &throwaway;
}
unsigned tag;
if (!parse_asn1_tag(&header, &tag)) {
return 0;
}
if (out_tag != NULL) {
*out_tag = tag;
}
uint8_t length_byte;
if (!CBS_get_u8(&header, &length_byte)) {
return 0;
}
size_t header_len = CBS_len(cbs) - CBS_len(&header);
size_t len;
// The format for the length encoding is specified in ITU-T X.690 section
// 8.1.3.
if ((length_byte & 0x80) == 0) {
// Short form length.
len = ((size_t) length_byte) + header_len;
if (out_header_len != NULL) {
*out_header_len = header_len;
}
} else {
// The high bit indicate that this is the long form, while the next 7 bits
// encode the number of subsequent octets used to encode the length (ITU-T
// X.690 clause 8.1.3.5.b).
const size_t num_bytes = length_byte & 0x7f;
uint64_t len64;
if (ber_ok && (tag & CBS_ASN1_CONSTRUCTED) != 0 && num_bytes == 0) {
// indefinite length
if (out_header_len != NULL) {
*out_header_len = header_len;
}
return CBS_get_bytes(cbs, out, header_len);
}
// ITU-T X.690 clause 8.1.3.5.c specifies that the value 0xff shall not be
// used as the first byte of the length. If this parser encounters that
// value, num_bytes will be parsed as 127, which will fail the check below.
if (num_bytes == 0 || num_bytes > 4) {
return 0;
}
if (!cbs_get_u(&header, &len64, num_bytes)) {
return 0;
}
// ITU-T X.690 section 10.1 (DER length forms) requires encoding the length
// with the minimum number of octets.
if (len64 < 128) {
// Length should have used short-form encoding.
return 0;
}
if ((len64 >> ((num_bytes-1)*8)) == 0) {
// Length should have been at least one byte shorter.
return 0;
}
len = len64;
if (len + header_len + num_bytes < len) {
// Overflow.
return 0;
}
len += header_len + num_bytes;
if (out_header_len != NULL) {
*out_header_len = header_len + num_bytes;
}
}
return CBS_get_bytes(cbs, out, len);
}
int CBS_get_any_asn1(CBS *cbs, CBS *out, unsigned *out_tag) {
size_t header_len;
if (!CBS_get_any_asn1_element(cbs, out, out_tag, &header_len)) {
return 0;
}
if (!CBS_skip(out, header_len)) {
assert(0);
return 0;
}
return 1;
}
int CBS_get_any_asn1_element(CBS *cbs, CBS *out, unsigned *out_tag,
size_t *out_header_len) {
return cbs_get_any_asn1_element(cbs, out, out_tag, out_header_len,
0 /* DER only */);
}
int CBS_get_any_ber_asn1_element(CBS *cbs, CBS *out, unsigned *out_tag,
size_t *out_header_len) {
return cbs_get_any_asn1_element(cbs, out, out_tag, out_header_len,
1 /* BER allowed */);
}
static int cbs_get_asn1(CBS *cbs, CBS *out, unsigned tag_value,
int skip_header) {
size_t header_len;
unsigned tag;
CBS throwaway;
if (out == NULL) {
out = &throwaway;
}
if (!CBS_get_any_asn1_element(cbs, out, &tag, &header_len) ||
tag != tag_value) {
return 0;
}
if (skip_header && !CBS_skip(out, header_len)) {
assert(0);
return 0;
}
return 1;
}
int CBS_get_asn1(CBS *cbs, CBS *out, unsigned tag_value) {
return cbs_get_asn1(cbs, out, tag_value, 1 /* skip header */);
}
int CBS_get_asn1_element(CBS *cbs, CBS *out, unsigned tag_value) {
return cbs_get_asn1(cbs, out, tag_value, 0 /* include header */);
}
int CBS_peek_asn1_tag(const CBS *cbs, unsigned tag_value) {
if (CBS_len(cbs) < 1) {
return 0;
}
CBS copy = *cbs;
unsigned actual_tag;
return parse_asn1_tag(&copy, &actual_tag) && tag_value == actual_tag;
}
int CBS_get_asn1_uint64(CBS *cbs, uint64_t *out) {
CBS bytes;
if (!CBS_get_asn1(cbs, &bytes, CBS_ASN1_INTEGER)) {
return 0;
}
*out = 0;
const uint8_t *data = CBS_data(&bytes);
size_t len = CBS_len(&bytes);
if (len == 0) {
// An INTEGER is encoded with at least one octet.
return 0;
}
if ((data[0] & 0x80) != 0) {
// Negative number.
return 0;
}
if (data[0] == 0 && len > 1 && (data[1] & 0x80) == 0) {
// Extra leading zeros.
return 0;
}
for (size_t i = 0; i < len; i++) {
if ((*out >> 56) != 0) {
// Too large to represent as a uint64_t.
return 0;
}
*out <<= 8;
*out |= data[i];
}
return 1;
}
int CBS_get_asn1_int64(CBS *cbs, int64_t *out) {
CBS bytes;
if (!CBS_get_asn1(cbs, &bytes, CBS_ASN1_INTEGER)) {
return 0;
}
const uint8_t *data = CBS_data(&bytes);
const size_t len = CBS_len(&bytes);
if (len == 0 || len > sizeof(int64_t)) {
// An INTEGER is encoded with at least one octet.
return 0;
}
if (len > 1) {
if (data[0] == 0 && (data[1] & 0x80) == 0) {
return 0; // Extra leading zeros.
}
if (data[0] == 0xff && (data[1] & 0x80) != 0) {
return 0; // Extra leading 0xff.
}
}
union {
int64_t i;
uint8_t bytes[sizeof(int64_t)];
} u;
const int is_negative = (data[0] & 0x80);
memset(u.bytes, is_negative ? 0xff : 0, sizeof(u.bytes)); // Sign-extend.
for (size_t i = 0; i < len; i++) {
u.bytes[i] = data[len - i - 1];
}
*out = u.i;
return 1;
}
int CBS_get_asn1_bool(CBS *cbs, int *out) {
CBS bytes;
if (!CBS_get_asn1(cbs, &bytes, CBS_ASN1_BOOLEAN) ||
CBS_len(&bytes) != 1) {
return 0;
}
const uint8_t value = *CBS_data(&bytes);
if (value != 0 && value != 0xff) {
return 0;
}
*out = !!value;
return 1;
}
int CBS_get_optional_asn1(CBS *cbs, CBS *out, int *out_present, unsigned tag) {
int present = 0;
if (CBS_peek_asn1_tag(cbs, tag)) {
if (!CBS_get_asn1(cbs, out, tag)) {
return 0;
}
present = 1;
}
if (out_present != NULL) {
*out_present = present;
}
return 1;
}
int CBS_get_optional_asn1_octet_string(CBS *cbs, CBS *out, int *out_present,
unsigned tag) {
CBS child;
int present;
if (!CBS_get_optional_asn1(cbs, &child, &present, tag)) {
return 0;
}
if (present) {
assert(out);
if (!CBS_get_asn1(&child, out, CBS_ASN1_OCTETSTRING) ||
CBS_len(&child) != 0) {
return 0;
}
} else {
CBS_init(out, NULL, 0);
}
if (out_present) {
*out_present = present;
}
return 1;
}
int CBS_get_optional_asn1_uint64(CBS *cbs, uint64_t *out, unsigned tag,
uint64_t default_value) {
CBS child;
int present;
if (!CBS_get_optional_asn1(cbs, &child, &present, tag)) {
return 0;
}
if (present) {
if (!CBS_get_asn1_uint64(&child, out) ||
CBS_len(&child) != 0) {
return 0;
}
} else {
*out = default_value;
}
return 1;
}
int CBS_get_optional_asn1_bool(CBS *cbs, int *out, unsigned tag,
int default_value) {
CBS child, child2;
int present;
if (!CBS_get_optional_asn1(cbs, &child, &present, tag)) {
return 0;
}
if (present) {
uint8_t boolean;
if (!CBS_get_asn1(&child, &child2, CBS_ASN1_BOOLEAN) ||
CBS_len(&child2) != 1 ||
CBS_len(&child) != 0) {
return 0;
}
boolean = CBS_data(&child2)[0];
if (boolean == 0) {
*out = 0;
} else if (boolean == 0xff) {
*out = 1;
} else {
return 0;
}
} else {
*out = default_value;
}
return 1;
}
int CBS_is_valid_asn1_bitstring(const CBS *cbs) {
CBS in = *cbs;
uint8_t num_unused_bits;
if (!CBS_get_u8(&in, &num_unused_bits) ||
num_unused_bits > 7) {
return 0;
}
if (num_unused_bits == 0) {
return 1;
}
// All num_unused_bits bits must exist and be zeros.
uint8_t last;
if (!CBS_get_last_u8(&in, &last) ||
(last & ((1 << num_unused_bits) - 1)) != 0) {
return 0;
}
return 1;
}
int CBS_asn1_bitstring_has_bit(const CBS *cbs, unsigned bit) {
if (!CBS_is_valid_asn1_bitstring(cbs)) {
return 0;
}
const unsigned byte_num = (bit >> 3) + 1;
const unsigned bit_num = 7 - (bit & 7);
// Unused bits are zero, and this function does not distinguish between
// missing and unset bits. Thus it is sufficient to do a byte-level length
// check.
return byte_num < CBS_len(cbs) &&
(CBS_data(cbs)[byte_num] & (1 << bit_num)) != 0;
}
static int add_decimal(CBB *out, uint64_t v) {
char buf[DECIMAL_SIZE(uint64_t) + 1];
BIO_snprintf(buf, sizeof(buf), "%" PRIu64, v);
return CBB_add_bytes(out, (const uint8_t *)buf, strlen(buf));
}
char *CBS_asn1_oid_to_text(const CBS *cbs) {
CBB cbb;
if (!CBB_init(&cbb, 32)) {
goto err;
}
CBS copy = *cbs;
// The first component is 40 * value1 + value2, where value1 is 0, 1, or 2.
uint64_t v;
if (!parse_base128_integer(&copy, &v)) {
goto err;
}
if (v >= 80) {
if (!CBB_add_bytes(&cbb, (const uint8_t *)"2.", 2) ||
!add_decimal(&cbb, v - 80)) {
goto err;
}
} else if (!add_decimal(&cbb, v / 40) ||
!CBB_add_u8(&cbb, '.') ||
!add_decimal(&cbb, v % 40)) {
goto err;
}
while (CBS_len(&copy) != 0) {
if (!parse_base128_integer(&copy, &v) ||
!CBB_add_u8(&cbb, '.') ||
!add_decimal(&cbb, v)) {
goto err;
}
}
uint8_t *txt;
size_t txt_len;
if (!CBB_add_u8(&cbb, '\0') ||
!CBB_finish(&cbb, &txt, &txt_len)) {
goto err;
}
return (char *)txt;
err:
CBB_cleanup(&cbb);
return NULL;
}

View File

@ -0,0 +1,96 @@
/* Copyright (c) 2014, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#ifndef OPENSSL_HEADER_BYTESTRING_INTERNAL_H
#define OPENSSL_HEADER_BYTESTRING_INTERNAL_H
#include <CBigNumBoringSSL_base.h>
#if defined(__cplusplus)
extern "C" {
#endif
// CBS_asn1_ber_to_der reads a BER element from |in|. If it finds
// indefinite-length elements or constructed strings then it converts the BER
// data to DER, sets |out| to the converted contents and |*out_storage| to a
// buffer which the caller must release with |OPENSSL_free|. Otherwise, it sets
// |out| to the original BER element in |in| and |*out_storage| to NULL.
// Additionally, |*in| will be advanced over the BER element.
//
// This function should successfully process any valid BER input, however it
// will not convert all of BER's deviations from DER. BER is ambiguous between
// implicitly-tagged SEQUENCEs of strings and implicitly-tagged constructed
// strings. Implicitly-tagged strings must be parsed with
// |CBS_get_ber_implicitly_tagged_string| instead of |CBS_get_asn1|. The caller
// must also account for BER variations in the contents of a primitive.
//
// It returns one on success and zero otherwise.
OPENSSL_EXPORT int CBS_asn1_ber_to_der(CBS *in, CBS *out,
uint8_t **out_storage);
// CBS_get_asn1_implicit_string parses a BER string of primitive type
// |inner_tag| implicitly-tagged with |outer_tag|. It sets |out| to the
// contents. If concatenation was needed, it sets |*out_storage| to a buffer
// which the caller must release with |OPENSSL_free|. Otherwise, it sets
// |*out_storage| to NULL.
//
// This function does not parse all of BER. It requires the string be
// definite-length. Constructed strings are allowed, but all children of the
// outermost element must be primitive. The caller should use
// |CBS_asn1_ber_to_der| before running this function.
//
// It returns one on success and zero otherwise.
OPENSSL_EXPORT int CBS_get_asn1_implicit_string(CBS *in, CBS *out,
uint8_t **out_storage,
unsigned outer_tag,
unsigned inner_tag);
// CBB_finish_i2d calls |CBB_finish| on |cbb| which must have been initialized
// with |CBB_init|. If |outp| is not NULL then the result is written to |*outp|
// and |*outp| is advanced just past the output. It returns the number of bytes
// in the result, whether written or not, or a negative value on error. On
// error, it calls |CBB_cleanup| on |cbb|.
//
// This function may be used to help implement legacy i2d ASN.1 functions.
int CBB_finish_i2d(CBB *cbb, uint8_t **outp);
// Unicode utilities.
// The following functions read one Unicode code point from |cbs| with the
// corresponding encoding and store it in |*out|. They return one on success and
// zero on error.
OPENSSL_EXPORT int cbs_get_utf8(CBS *cbs, uint32_t *out);
OPENSSL_EXPORT int cbs_get_latin1(CBS *cbs, uint32_t *out);
OPENSSL_EXPORT int cbs_get_ucs2_be(CBS *cbs, uint32_t *out);
OPENSSL_EXPORT int cbs_get_utf32_be(CBS *cbs, uint32_t *out);
// cbb_get_utf8_len returns the number of bytes needed to represent |u| in
// UTF-8.
OPENSSL_EXPORT size_t cbb_get_utf8_len(uint32_t u);
// The following functions encode |u| to |cbb| with the corresponding
// encoding. They return one on success and zero on error.
OPENSSL_EXPORT int cbb_add_utf8(CBB *cbb, uint32_t u);
OPENSSL_EXPORT int cbb_add_latin1(CBB *cbb, uint32_t u);
OPENSSL_EXPORT int cbb_add_ucs2_be(CBB *cbb, uint32_t u);
OPENSSL_EXPORT int cbb_add_utf32_be(CBB *cbb, uint32_t u);
#if defined(__cplusplus)
} // extern C
#endif
#endif // OPENSSL_HEADER_BYTESTRING_INTERNAL_H

View File

@ -0,0 +1,155 @@
/* Copyright (c) 2018, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#include <CBigNumBoringSSL_bytestring.h>
#include "internal.h"
static int is_valid_code_point(uint32_t v) {
// References in the following are to Unicode 9.0.0.
if (// The Unicode space runs from zero to 0x10ffff (3.4 D9).
v > 0x10ffff ||
// Values 0x...fffe, 0x...ffff, and 0xfdd0-0xfdef are permanently reserved
// (3.4 D14)
(v & 0xfffe) == 0xfffe ||
(v >= 0xfdd0 && v <= 0xfdef) ||
// Surrogate code points are invalid (3.2 C1).
(v >= 0xd800 && v <= 0xdfff)) {
return 0;
}
return 1;
}
// BOTTOM_BITS returns a byte with the bottom |n| bits set.
#define BOTTOM_BITS(n) (uint8_t)((1u << (n)) - 1)
// TOP_BITS returns a byte with the top |n| bits set.
#define TOP_BITS(n) ((uint8_t)~BOTTOM_BITS(8 - (n)))
int cbs_get_utf8(CBS *cbs, uint32_t *out) {
uint8_t c;
if (!CBS_get_u8(cbs, &c)) {
return 0;
}
if (c <= 0x7f) {
*out = c;
return 1;
}
uint32_t v, lower_bound;
size_t len;
if ((c & TOP_BITS(3)) == TOP_BITS(2)) {
v = c & BOTTOM_BITS(5);
len = 1;
lower_bound = 0x80;
} else if ((c & TOP_BITS(4)) == TOP_BITS(3)) {
v = c & BOTTOM_BITS(4);
len = 2;
lower_bound = 0x800;
} else if ((c & TOP_BITS(5)) == TOP_BITS(4)) {
v = c & BOTTOM_BITS(3);
len = 3;
lower_bound = 0x10000;
} else {
return 0;
}
for (size_t i = 0; i < len; i++) {
if (!CBS_get_u8(cbs, &c) ||
(c & TOP_BITS(2)) != TOP_BITS(1)) {
return 0;
}
v <<= 6;
v |= c & BOTTOM_BITS(6);
}
if (!is_valid_code_point(v) ||
v < lower_bound) {
return 0;
}
*out = v;
return 1;
}
int cbs_get_latin1(CBS *cbs, uint32_t *out) {
uint8_t c;
if (!CBS_get_u8(cbs, &c)) {
return 0;
}
*out = c;
return 1;
}
int cbs_get_ucs2_be(CBS *cbs, uint32_t *out) {
// Note UCS-2 (used by BMPString) does not support surrogates.
uint16_t c;
if (!CBS_get_u16(cbs, &c) ||
!is_valid_code_point(c)) {
return 0;
}
*out = c;
return 1;
}
int cbs_get_utf32_be(CBS *cbs, uint32_t *out) {
return CBS_get_u32(cbs, out) && is_valid_code_point(*out);
}
size_t cbb_get_utf8_len(uint32_t u) {
if (u <= 0x7f) {
return 1;
}
if (u <= 0x7ff) {
return 2;
}
if (u <= 0xffff) {
return 3;
}
return 4;
}
int cbb_add_utf8(CBB *cbb, uint32_t u) {
if (!is_valid_code_point(u)) {
return 0;
}
if (u <= 0x7f) {
return CBB_add_u8(cbb, (uint8_t)u);
}
if (u <= 0x7ff) {
return CBB_add_u8(cbb, TOP_BITS(2) | (u >> 6)) &&
CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
}
if (u <= 0xffff) {
return CBB_add_u8(cbb, TOP_BITS(3) | (u >> 12)) &&
CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) &&
CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
}
if (u <= 0x10ffff) {
return CBB_add_u8(cbb, TOP_BITS(4) | (u >> 18)) &&
CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 12) & BOTTOM_BITS(6))) &&
CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) &&
CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
}
return 0;
}
int cbb_add_latin1(CBB *cbb, uint32_t u) {
return u <= 0xff && CBB_add_u8(cbb, (uint8_t)u);
}
int cbb_add_ucs2_be(CBB *cbb, uint32_t u) {
return u <= 0xffff && is_valid_code_point(u) && CBB_add_u16(cbb, (uint16_t)u);
}
int cbb_add_utf32_be(CBB *cbb, uint32_t u) {
return is_valid_code_point(u) && CBB_add_u32(cbb, u);
}

View File

@ -0,0 +1,55 @@
/* Copyright (c) 2018, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#include <CBigNumBoringSSL_cpu.h>
#if defined(OPENSSL_AARCH64) && defined(OPENSSL_FUCHSIA) && \
!defined(OPENSSL_STATIC_ARMCAP)
#include <zircon/features.h>
#include <zircon/syscalls.h>
#include <zircon/types.h>
#include <CBigNumBoringSSL_arm_arch.h>
#include "internal.h"
extern uint32_t OPENSSL_armcap_P;
void OPENSSL_cpuid_setup(void) {
uint32_t hwcap;
zx_status_t rc = zx_system_get_features(ZX_FEATURE_KIND_CPU, &hwcap);
if (rc != ZX_OK || (hwcap & ZX_ARM64_FEATURE_ISA_ASIMD) == 0) {
// Matching OpenSSL, if NEON/ASIMD is missing, don't report other features
// either.
return;
}
OPENSSL_armcap_P |= ARMV7_NEON;
if (hwcap & ZX_ARM64_FEATURE_ISA_AES) {
OPENSSL_armcap_P |= ARMV8_AES;
}
if (hwcap & ZX_ARM64_FEATURE_ISA_PMULL) {
OPENSSL_armcap_P |= ARMV8_PMULL;
}
if (hwcap & ZX_ARM64_FEATURE_ISA_SHA1) {
OPENSSL_armcap_P |= ARMV8_SHA1;
}
if (hwcap & ZX_ARM64_FEATURE_ISA_SHA2) {
OPENSSL_armcap_P |= ARMV8_SHA256;
}
}
#endif // OPENSSL_AARCH64 && !OPENSSL_STATIC_ARMCAP

View File

@ -0,0 +1,62 @@
/* Copyright (c) 2016, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#include <CBigNumBoringSSL_cpu.h>
#if defined(OPENSSL_AARCH64) && defined(OPENSSL_LINUX) && \
!defined(OPENSSL_STATIC_ARMCAP)
#include <sys/auxv.h>
#include <CBigNumBoringSSL_arm_arch.h>
#include "internal.h"
extern uint32_t OPENSSL_armcap_P;
void OPENSSL_cpuid_setup(void) {
unsigned long hwcap = getauxval(AT_HWCAP);
// See /usr/include/asm/hwcap.h on an aarch64 installation for the source of
// these values.
static const unsigned long kNEON = 1 << 1;
static const unsigned long kAES = 1 << 3;
static const unsigned long kPMULL = 1 << 4;
static const unsigned long kSHA1 = 1 << 5;
static const unsigned long kSHA256 = 1 << 6;
if ((hwcap & kNEON) == 0) {
// Matching OpenSSL, if NEON is missing, don't report other features
// either.
return;
}
OPENSSL_armcap_P |= ARMV7_NEON;
if (hwcap & kAES) {
OPENSSL_armcap_P |= ARMV8_AES;
}
if (hwcap & kPMULL) {
OPENSSL_armcap_P |= ARMV8_PMULL;
}
if (hwcap & kSHA1) {
OPENSSL_armcap_P |= ARMV8_SHA1;
}
if (hwcap & kSHA256) {
OPENSSL_armcap_P |= ARMV8_SHA256;
}
}
#endif // OPENSSL_AARCH64 && !OPENSSL_STATIC_ARMCAP

View File

@ -0,0 +1,218 @@
/* Copyright (c) 2016, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#include <CBigNumBoringSSL_cpu.h>
#if defined(OPENSSL_ARM) && !defined(OPENSSL_STATIC_ARMCAP)
#include <errno.h>
#include <fcntl.h>
#include <sys/types.h>
#include <unistd.h>
#include <CBigNumBoringSSL_arm_arch.h>
#include <CBigNumBoringSSL_mem.h>
#include "cpu-arm-linux.h"
#define AT_HWCAP 16
#define AT_HWCAP2 26
// |getauxval| is not available on Android until API level 20. Link it as a weak
// symbol and use other methods as fallback.
unsigned long getauxval(unsigned long type) __attribute__((weak));
static int open_eintr(const char *path, int flags) {
int ret;
do {
ret = open(path, flags);
} while (ret < 0 && errno == EINTR);
return ret;
}
static ssize_t read_eintr(int fd, void *out, size_t len) {
ssize_t ret;
do {
ret = read(fd, out, len);
} while (ret < 0 && errno == EINTR);
return ret;
}
// read_full reads exactly |len| bytes from |fd| to |out|. On error or end of
// file, it returns zero.
static int read_full(int fd, void *out, size_t len) {
char *outp = out;
while (len > 0) {
ssize_t ret = read_eintr(fd, outp, len);
if (ret <= 0) {
return 0;
}
outp += ret;
len -= ret;
}
return 1;
}
// read_file opens |path| and reads until end-of-file. On success, it returns
// one and sets |*out_ptr| and |*out_len| to a newly-allocated buffer with the
// contents. Otherwise, it returns zero.
static int read_file(char **out_ptr, size_t *out_len, const char *path) {
int fd = open_eintr(path, O_RDONLY);
if (fd < 0) {
return 0;
}
static const size_t kReadSize = 1024;
int ret = 0;
size_t cap = kReadSize, len = 0;
char *buf = OPENSSL_malloc(cap);
if (buf == NULL) {
goto err;
}
for (;;) {
if (cap - len < kReadSize) {
size_t new_cap = cap * 2;
if (new_cap < cap) {
goto err;
}
char *new_buf = OPENSSL_realloc(buf, new_cap);
if (new_buf == NULL) {
goto err;
}
buf = new_buf;
cap = new_cap;
}
ssize_t bytes_read = read_eintr(fd, buf + len, kReadSize);
if (bytes_read < 0) {
goto err;
}
if (bytes_read == 0) {
break;
}
len += bytes_read;
}
*out_ptr = buf;
*out_len = len;
ret = 1;
buf = NULL;
err:
OPENSSL_free(buf);
close(fd);
return ret;
}
// getauxval_proc behaves like |getauxval| but reads from /proc/self/auxv.
static unsigned long getauxval_proc(unsigned long type) {
int fd = open_eintr("/proc/self/auxv", O_RDONLY);
if (fd < 0) {
return 0;
}
struct {
unsigned long tag;
unsigned long value;
} entry;
for (;;) {
if (!read_full(fd, &entry, sizeof(entry)) ||
(entry.tag == 0 && entry.value == 0)) {
break;
}
if (entry.tag == type) {
close(fd);
return entry.value;
}
}
close(fd);
return 0;
}
extern uint32_t OPENSSL_armcap_P;
static int g_has_broken_neon, g_needs_hwcap2_workaround;
void OPENSSL_cpuid_setup(void) {
char *cpuinfo_data;
size_t cpuinfo_len;
if (!read_file(&cpuinfo_data, &cpuinfo_len, "/proc/cpuinfo")) {
return;
}
STRING_PIECE cpuinfo;
cpuinfo.data = cpuinfo_data;
cpuinfo.len = cpuinfo_len;
// |getauxval| is not available on Android until API level 20. If it is
// unavailable, read from /proc/self/auxv as a fallback. This is unreadable
// on some versions of Android, so further fall back to /proc/cpuinfo.
//
// See
// https://android.googlesource.com/platform/ndk/+/882ac8f3392858991a0e1af33b4b7387ec856bd2
// and b/13679666 (Google-internal) for details.
unsigned long hwcap = 0;
if (getauxval != NULL) {
hwcap = getauxval(AT_HWCAP);
}
if (hwcap == 0) {
hwcap = getauxval_proc(AT_HWCAP);
}
if (hwcap == 0) {
hwcap = crypto_get_arm_hwcap_from_cpuinfo(&cpuinfo);
}
// Clear NEON support if known broken.
g_has_broken_neon = crypto_cpuinfo_has_broken_neon(&cpuinfo);
if (g_has_broken_neon) {
hwcap &= ~HWCAP_NEON;
}
// Matching OpenSSL, only report other features if NEON is present.
if (hwcap & HWCAP_NEON) {
OPENSSL_armcap_P |= ARMV7_NEON;
// Some ARMv8 Android devices don't expose AT_HWCAP2. Fall back to
// /proc/cpuinfo. See https://crbug.com/596156.
unsigned long hwcap2 = 0;
if (getauxval != NULL) {
hwcap2 = getauxval(AT_HWCAP2);
}
if (hwcap2 == 0) {
hwcap2 = crypto_get_arm_hwcap2_from_cpuinfo(&cpuinfo);
g_needs_hwcap2_workaround = hwcap2 != 0;
}
if (hwcap2 & HWCAP2_AES) {
OPENSSL_armcap_P |= ARMV8_AES;
}
if (hwcap2 & HWCAP2_PMULL) {
OPENSSL_armcap_P |= ARMV8_PMULL;
}
if (hwcap2 & HWCAP2_SHA1) {
OPENSSL_armcap_P |= ARMV8_SHA1;
}
if (hwcap2 & HWCAP2_SHA2) {
OPENSSL_armcap_P |= ARMV8_SHA256;
}
}
OPENSSL_free(cpuinfo_data);
}
int CRYPTO_has_broken_NEON(void) { return g_has_broken_neon; }
int CRYPTO_needs_hwcap2_workaround(void) { return g_needs_hwcap2_workaround; }
#endif // OPENSSL_ARM && !OPENSSL_STATIC_ARMCAP

View File

@ -0,0 +1,201 @@
/* Copyright (c) 2018, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#ifndef OPENSSL_HEADER_CRYPTO_CPU_ARM_LINUX_H
#define OPENSSL_HEADER_CRYPTO_CPU_ARM_LINUX_H
#include <CBigNumBoringSSL_base.h>
#include <string.h>
#include "internal.h"
#if defined(__cplusplus)
extern "C" {
#endif
// The cpuinfo parser lives in a header file so it may be accessible from
// cross-platform fuzzers without adding code to those platforms normally.
#define HWCAP_NEON (1 << 12)
// See /usr/include/asm/hwcap.h on an ARM installation for the source of
// these values.
#define HWCAP2_AES (1 << 0)
#define HWCAP2_PMULL (1 << 1)
#define HWCAP2_SHA1 (1 << 2)
#define HWCAP2_SHA2 (1 << 3)
typedef struct {
const char *data;
size_t len;
} STRING_PIECE;
static int STRING_PIECE_equals(const STRING_PIECE *a, const char *b) {
size_t b_len = strlen(b);
return a->len == b_len && OPENSSL_memcmp(a->data, b, b_len) == 0;
}
// STRING_PIECE_split finds the first occurence of |sep| in |in| and, if found,
// sets |*out_left| and |*out_right| to |in| split before and after it. It
// returns one if |sep| was found and zero otherwise.
static int STRING_PIECE_split(STRING_PIECE *out_left, STRING_PIECE *out_right,
const STRING_PIECE *in, char sep) {
const char *p = (const char *)OPENSSL_memchr(in->data, sep, in->len);
if (p == NULL) {
return 0;
}
// |out_left| or |out_right| may alias |in|, so make a copy.
STRING_PIECE in_copy = *in;
out_left->data = in_copy.data;
out_left->len = p - in_copy.data;
out_right->data = in_copy.data + out_left->len + 1;
out_right->len = in_copy.len - out_left->len - 1;
return 1;
}
// STRING_PIECE_get_delimited reads a |sep|-delimited entry from |s|, writing it
// to |out| and updating |s| to point beyond it. It returns one on success and
// zero if |s| is empty. If |s| is has no copies of |sep| and is non-empty, it
// reads the entire string to |out|.
static int STRING_PIECE_get_delimited(STRING_PIECE *s, STRING_PIECE *out, char sep) {
if (s->len == 0) {
return 0;
}
if (!STRING_PIECE_split(out, s, s, sep)) {
// |s| had no instances of |sep|. Return the entire string.
*out = *s;
s->data += s->len;
s->len = 0;
}
return 1;
}
// STRING_PIECE_trim removes leading and trailing whitespace from |s|.
static void STRING_PIECE_trim(STRING_PIECE *s) {
while (s->len != 0 && (s->data[0] == ' ' || s->data[0] == '\t')) {
s->data++;
s->len--;
}
while (s->len != 0 &&
(s->data[s->len - 1] == ' ' || s->data[s->len - 1] == '\t')) {
s->len--;
}
}
// extract_cpuinfo_field extracts a /proc/cpuinfo field named |field| from
// |in|. If found, it sets |*out| to the value and returns one. Otherwise, it
// returns zero.
static int extract_cpuinfo_field(STRING_PIECE *out, const STRING_PIECE *in,
const char *field) {
// Process |in| one line at a time.
STRING_PIECE remaining = *in, line;
while (STRING_PIECE_get_delimited(&remaining, &line, '\n')) {
STRING_PIECE key, value;
if (!STRING_PIECE_split(&key, &value, &line, ':')) {
continue;
}
STRING_PIECE_trim(&key);
if (STRING_PIECE_equals(&key, field)) {
STRING_PIECE_trim(&value);
*out = value;
return 1;
}
}
return 0;
}
static int cpuinfo_field_equals(const STRING_PIECE *cpuinfo, const char *field,
const char *value) {
STRING_PIECE extracted;
return extract_cpuinfo_field(&extracted, cpuinfo, field) &&
STRING_PIECE_equals(&extracted, value);
}
// has_list_item treats |list| as a space-separated list of items and returns
// one if |item| is contained in |list| and zero otherwise.
static int has_list_item(const STRING_PIECE *list, const char *item) {
STRING_PIECE remaining = *list, feature;
while (STRING_PIECE_get_delimited(&remaining, &feature, ' ')) {
if (STRING_PIECE_equals(&feature, item)) {
return 1;
}
}
return 0;
}
// crypto_get_arm_hwcap_from_cpuinfo returns an equivalent ARM |AT_HWCAP| value
// from |cpuinfo|.
static unsigned long crypto_get_arm_hwcap_from_cpuinfo(
const STRING_PIECE *cpuinfo) {
if (cpuinfo_field_equals(cpuinfo, "CPU architecture", "8")) {
// This is a 32-bit ARM binary running on a 64-bit kernel. NEON is always
// available on ARMv8. Linux omits required features, so reading the
// "Features" line does not work. (For simplicity, use strict equality. We
// assume everything running on future ARM architectures will have a
// working |getauxval|.)
return HWCAP_NEON;
}
STRING_PIECE features;
if (extract_cpuinfo_field(&features, cpuinfo, "Features") &&
has_list_item(&features, "neon")) {
return HWCAP_NEON;
}
return 0;
}
// crypto_get_arm_hwcap2_from_cpuinfo returns an equivalent ARM |AT_HWCAP2|
// value from |cpuinfo|.
static unsigned long crypto_get_arm_hwcap2_from_cpuinfo(
const STRING_PIECE *cpuinfo) {
STRING_PIECE features;
if (!extract_cpuinfo_field(&features, cpuinfo, "Features")) {
return 0;
}
unsigned long ret = 0;
if (has_list_item(&features, "aes")) {
ret |= HWCAP2_AES;
}
if (has_list_item(&features, "pmull")) {
ret |= HWCAP2_PMULL;
}
if (has_list_item(&features, "sha1")) {
ret |= HWCAP2_SHA1;
}
if (has_list_item(&features, "sha2")) {
ret |= HWCAP2_SHA2;
}
return ret;
}
// crypto_cpuinfo_has_broken_neon returns one if |cpuinfo| matches a CPU known
// to have broken NEON unit and zero otherwise. See https://crbug.com/341598.
static int crypto_cpuinfo_has_broken_neon(const STRING_PIECE *cpuinfo) {
return cpuinfo_field_equals(cpuinfo, "CPU implementer", "0x51") &&
cpuinfo_field_equals(cpuinfo, "CPU architecture", "7") &&
cpuinfo_field_equals(cpuinfo, "CPU variant", "0x1") &&
cpuinfo_field_equals(cpuinfo, "CPU part", "0x04d") &&
cpuinfo_field_equals(cpuinfo, "CPU revision", "0");
}
#if defined(__cplusplus)
} // extern C
#endif
#endif // OPENSSL_HEADER_CRYPTO_CPU_ARM_LINUX_H

View File

@ -0,0 +1,38 @@
/* Copyright (c) 2014, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#include <CBigNumBoringSSL_cpu.h>
#if (defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) && \
!defined(OPENSSL_STATIC_ARMCAP)
#include <CBigNumBoringSSL_arm_arch.h>
extern uint32_t OPENSSL_armcap_P;
char CRYPTO_is_NEON_capable_at_runtime(void) {
return (OPENSSL_armcap_P & ARMV7_NEON) != 0;
}
int CRYPTO_is_ARMv8_AES_capable(void) {
return (OPENSSL_armcap_P & ARMV8_AES) != 0;
}
int CRYPTO_is_ARMv8_PMULL_capable(void) {
return (OPENSSL_armcap_P & ARMV8_PMULL) != 0;
}
#endif /* (defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) &&
!defined(OPENSSL_STATIC_ARMCAP) */

View File

@ -0,0 +1,291 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.] */
#include <CBigNumBoringSSL_cpu.h>
#if !defined(OPENSSL_NO_ASM) && (defined(OPENSSL_X86) || defined(OPENSSL_X86_64))
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#if defined(_MSC_VER)
OPENSSL_MSVC_PRAGMA(warning(push, 3))
#include <immintrin.h>
#include <intrin.h>
OPENSSL_MSVC_PRAGMA(warning(pop))
#endif
#include "internal.h"
// OPENSSL_cpuid runs the cpuid instruction. |leaf| is passed in as EAX and ECX
// is set to zero. It writes EAX, EBX, ECX, and EDX to |*out_eax| through
// |*out_edx|.
static void OPENSSL_cpuid(uint32_t *out_eax, uint32_t *out_ebx,
uint32_t *out_ecx, uint32_t *out_edx, uint32_t leaf) {
#if defined(_MSC_VER)
int tmp[4];
__cpuid(tmp, (int)leaf);
*out_eax = (uint32_t)tmp[0];
*out_ebx = (uint32_t)tmp[1];
*out_ecx = (uint32_t)tmp[2];
*out_edx = (uint32_t)tmp[3];
#elif defined(__pic__) && defined(OPENSSL_32_BIT)
// Inline assembly may not clobber the PIC register. For 32-bit, this is EBX.
// See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602.
__asm__ volatile (
"xor %%ecx, %%ecx\n"
"mov %%ebx, %%edi\n"
"cpuid\n"
"xchg %%edi, %%ebx\n"
: "=a"(*out_eax), "=D"(*out_ebx), "=c"(*out_ecx), "=d"(*out_edx)
: "a"(leaf)
);
#else
__asm__ volatile (
"xor %%ecx, %%ecx\n"
"cpuid\n"
: "=a"(*out_eax), "=b"(*out_ebx), "=c"(*out_ecx), "=d"(*out_edx)
: "a"(leaf)
);
#endif
}
// OPENSSL_xgetbv returns the value of an Intel Extended Control Register (XCR).
// Currently only XCR0 is defined by Intel so |xcr| should always be zero.
static uint64_t OPENSSL_xgetbv(uint32_t xcr) {
#if defined(_MSC_VER)
return (uint64_t)_xgetbv(xcr);
#else
uint32_t eax, edx;
__asm__ volatile ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
return (((uint64_t)edx) << 32) | eax;
#endif
}
// handle_cpu_env applies the value from |in| to the CPUID values in |out[0]|
// and |out[1]|. See the comment in |OPENSSL_cpuid_setup| about this.
static void handle_cpu_env(uint32_t *out, const char *in) {
const int invert = in[0] == '~';
const int or = in[0] == '|';
const int skip_first_byte = invert || or;
const int hex = in[skip_first_byte] == '0' && in[skip_first_byte+1] == 'x';
int sscanf_result;
uint64_t v;
if (hex) {
sscanf_result = sscanf(in + invert + 2, "%" PRIx64, &v);
} else {
sscanf_result = sscanf(in + invert, "%" PRIu64, &v);
}
if (!sscanf_result) {
return;
}
if (invert) {
out[0] &= ~v;
out[1] &= ~(v >> 32);
} else if (or) {
out[0] |= v;
out[1] |= (v >> 32);
} else {
out[0] = v;
out[1] = v >> 32;
}
}
void OPENSSL_cpuid_setup(void) {
// Determine the vendor and maximum input value.
uint32_t eax, ebx, ecx, edx;
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0);
uint32_t num_ids = eax;
int is_intel = ebx == 0x756e6547 /* Genu */ &&
edx == 0x49656e69 /* ineI */ &&
ecx == 0x6c65746e /* ntel */;
int is_amd = ebx == 0x68747541 /* Auth */ &&
edx == 0x69746e65 /* enti */ &&
ecx == 0x444d4163 /* cAMD */;
uint32_t extended_features[2] = {0};
if (num_ids >= 7) {
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 7);
extended_features[0] = ebx;
extended_features[1] = ecx;
}
OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 1);
if (is_amd) {
// See https://www.amd.com/system/files/TechDocs/25481.pdf, page 10.
const uint32_t base_family = (eax >> 8) & 15;
const uint32_t base_model = (eax >> 4) & 15;
uint32_t family = base_family;
uint32_t model = base_model;
if (base_family == 0xf) {
const uint32_t ext_family = (eax >> 20) & 255;
family += ext_family;
const uint32_t ext_model = (eax >> 16) & 15;
model |= ext_model << 4;
}
if (family < 0x17 || (family == 0x17 && 0x70 <= model && model <= 0x7f)) {
// Disable RDRAND on AMD families before 0x17 (Zen) due to reported
// failures after suspend.
// https://bugzilla.redhat.com/show_bug.cgi?id=1150286
// Also disable for family 0x17, models 0x700x7f, due to possible RDRAND
// failures there too.
ecx &= ~(1u << 30);
}
}
// Force the hyper-threading bit so that the more conservative path is always
// chosen.
edx |= 1u << 28;
// Reserved bit #20 was historically repurposed to control the in-memory
// representation of RC4 state. Always set it to zero.
edx &= ~(1u << 20);
// Reserved bit #30 is repurposed to signal an Intel CPU.
if (is_intel) {
edx |= (1u << 30);
// Clear the XSAVE bit on Knights Landing to mimic Silvermont. This enables
// some Silvermont-specific codepaths which perform better. See OpenSSL
// commit 64d92d74985ebb3d0be58a9718f9e080a14a8e7f.
if ((eax & 0x0fff0ff0) == 0x00050670 /* Knights Landing */ ||
(eax & 0x0fff0ff0) == 0x00080650 /* Knights Mill (per SDE) */) {
ecx &= ~(1u << 26);
}
} else {
edx &= ~(1u << 30);
}
// The SDBG bit is repurposed to denote AMD XOP support. Don't ever use AMD
// XOP code paths.
ecx &= ~(1u << 11);
uint64_t xcr0 = 0;
if (ecx & (1u << 27)) {
// XCR0 may only be queried if the OSXSAVE bit is set.
xcr0 = OPENSSL_xgetbv(0);
}
// See Intel manual, volume 1, section 14.3.
if ((xcr0 & 6) != 6) {
// YMM registers cannot be used.
ecx &= ~(1u << 28); // AVX
ecx &= ~(1u << 12); // FMA
ecx &= ~(1u << 11); // AMD XOP
// Clear AVX2 and AVX512* bits.
//
// TODO(davidben): Should bits 17 and 26-28 also be cleared? Upstream
// doesn't clear those.
extended_features[0] &=
~((1u << 5) | (1u << 16) | (1u << 21) | (1u << 30) | (1u << 31));
}
// See Intel manual, volume 1, section 15.2.
if ((xcr0 & 0xe6) != 0xe6) {
// Clear AVX512F. Note we don't touch other AVX512 extensions because they
// can be used with YMM.
extended_features[0] &= ~(1u << 16);
}
// Disable ADX instructions on Knights Landing. See OpenSSL commit
// 64d92d74985ebb3d0be58a9718f9e080a14a8e7f.
if ((ecx & (1u << 26)) == 0) {
extended_features[0] &= ~(1u << 19);
}
OPENSSL_ia32cap_P[0] = edx;
OPENSSL_ia32cap_P[1] = ecx;
OPENSSL_ia32cap_P[2] = extended_features[0];
OPENSSL_ia32cap_P[3] = extended_features[1];
const char *env1, *env2;
env1 = getenv("OPENSSL_ia32cap");
if (env1 == NULL) {
return;
}
// OPENSSL_ia32cap can contain zero, one or two values, separated with a ':'.
// Each value is a 64-bit, unsigned value which may start with "0x" to
// indicate a hex value. Prior to the 64-bit value, a '~' or '|' may be given.
//
// If the '~' prefix is present:
// the value is inverted and ANDed with the probed CPUID result
// If the '|' prefix is present:
// the value is ORed with the probed CPUID result
// Otherwise:
// the value is taken as the result of the CPUID
//
// The first value determines OPENSSL_ia32cap_P[0] and [1]. The second [2]
// and [3].
handle_cpu_env(&OPENSSL_ia32cap_P[0], env1);
env2 = strchr(env1, ':');
if (env2 != NULL) {
handle_cpu_env(&OPENSSL_ia32cap_P[2], env2 + 1);
}
}
#endif // !OPENSSL_NO_ASM && (OPENSSL_X86 || OPENSSL_X86_64)

View File

@ -0,0 +1,38 @@
/* Copyright (c) 2016, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#include <CBigNumBoringSSL_cpu.h>
#if defined(OPENSSL_PPC64LE)
#include <sys/auxv.h>
#include "internal.h"
#if !defined(PPC_FEATURE2_HAS_VCRYPTO)
// PPC_FEATURE2_HAS_VCRYPTO was taken from section 4.1.2.3 of the “OpenPOWER
// ABI for Linux Supplement”.
#define PPC_FEATURE2_HAS_VCRYPTO 0x02000000
#endif
void OPENSSL_cpuid_setup(void) {
OPENSSL_ppc64le_hwcap2 = getauxval(AT_HWCAP2);
}
int CRYPTO_is_PPC64LE_vcrypto_capable(void) {
return (OPENSSL_ppc64le_hwcap2 & PPC_FEATURE2_HAS_VCRYPTO) != 0;
}
#endif // OPENSSL_PPC64LE

View File

@ -0,0 +1,215 @@
/* Copyright (c) 2014, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#include <CBigNumBoringSSL_crypto.h>
#include <CBigNumBoringSSL_cpu.h>
#include "internal.h"
#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_STATIC_ARMCAP) && \
(defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \
defined(OPENSSL_PPC64LE))
// x86, x86_64, the ARMs and ppc64le need to record the result of a
// cpuid/getauxval call for the asm to work correctly, unless compiled without
// asm code.
#define NEED_CPUID
#else
// Otherwise, don't emit a static initialiser.
#if !defined(BORINGSSL_NO_STATIC_INITIALIZER)
#define BORINGSSL_NO_STATIC_INITIALIZER
#endif
#endif // !NO_ASM && !STATIC_ARMCAP &&
// (X86 || X86_64 || ARM || AARCH64 || PPC64LE)
// Our assembly does not use the GOT to reference symbols, which means
// references to visible symbols will often require a TEXTREL. This is
// undesirable, so all assembly-referenced symbols should be hidden. CPU
// capabilities are the only such symbols defined in C. Explicitly hide them,
// rather than rely on being built with -fvisibility=hidden.
#if defined(OPENSSL_WINDOWS)
#define HIDDEN
#else
#define HIDDEN __attribute__((visibility("hidden")))
#endif
// The capability variables are defined in this file in order to work around a
// linker bug. When linking with a .a, if no symbols in a .o are referenced
// then the .o is discarded, even if it has constructor functions.
//
// This still means that any binaries that don't include some functionality
// that tests the capability values will still skip the constructor but, so
// far, the init constructor function only sets the capability variables.
#if defined(BORINGSSL_DISPATCH_TEST)
// This value must be explicitly initialised to zero in order to work around a
// bug in libtool or the linker on OS X.
//
// If not initialised then it becomes a "common symbol". When put into an
// archive, linking on OS X will fail to resolve common symbols. By
// initialising it to zero, it becomes a "data symbol", which isn't so
// affected.
HIDDEN uint8_t BORINGSSL_function_hit[7] = {0};
#endif
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
// This value must be explicitly initialized to zero. See similar comment above.
HIDDEN uint32_t OPENSSL_ia32cap_P[4] = {0};
#elif defined(OPENSSL_PPC64LE)
HIDDEN unsigned long OPENSSL_ppc64le_hwcap2 = 0;
#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
#include <CBigNumBoringSSL_arm_arch.h>
#if defined(OPENSSL_STATIC_ARMCAP)
HIDDEN uint32_t OPENSSL_armcap_P =
#if defined(OPENSSL_STATIC_ARMCAP_NEON) || \
(defined(__ARM_NEON__) || defined(__ARM_NEON))
ARMV7_NEON |
#endif
#if defined(OPENSSL_STATIC_ARMCAP_AES) || defined(__ARM_FEATURE_CRYPTO)
ARMV8_AES |
#endif
#if defined(OPENSSL_STATIC_ARMCAP_SHA1) || defined(__ARM_FEATURE_CRYPTO)
ARMV8_SHA1 |
#endif
#if defined(OPENSSL_STATIC_ARMCAP_SHA256) || defined(__ARM_FEATURE_CRYPTO)
ARMV8_SHA256 |
#endif
#if defined(OPENSSL_STATIC_ARMCAP_PMULL) || defined(__ARM_FEATURE_CRYPTO)
ARMV8_PMULL |
#endif
0;
#else
HIDDEN uint32_t OPENSSL_armcap_P = 0;
uint32_t *OPENSSL_get_armcap_pointer_for_test(void) {
return &OPENSSL_armcap_P;
}
#endif
#endif
#if defined(BORINGSSL_FIPS)
// In FIPS mode, the power-on self-test function calls |CRYPTO_library_init|
// because we have to ensure that CPUID detection occurs first.
#define BORINGSSL_NO_STATIC_INITIALIZER
#endif
#if defined(OPENSSL_WINDOWS) && !defined(BORINGSSL_NO_STATIC_INITIALIZER)
#define OPENSSL_CDECL __cdecl
#else
#define OPENSSL_CDECL
#endif
#if defined(BORINGSSL_NO_STATIC_INITIALIZER)
static CRYPTO_once_t once = CRYPTO_ONCE_INIT;
#elif defined(_MSC_VER)
#pragma section(".CRT$XCU", read)
static void __cdecl do_library_init(void);
__declspec(allocate(".CRT$XCU")) void(*library_init_constructor)(void) =
do_library_init;
#else
static void do_library_init(void) __attribute__ ((constructor));
#endif
// do_library_init is the actual initialization function. If
// BORINGSSL_NO_STATIC_INITIALIZER isn't defined, this is set as a static
// initializer. Otherwise, it is called by CRYPTO_library_init.
static void OPENSSL_CDECL do_library_init(void) {
// WARNING: this function may only configure the capability variables. See the
// note above about the linker bug.
#if defined(NEED_CPUID)
OPENSSL_cpuid_setup();
#endif
}
void CRYPTO_library_init(void) {
// TODO(davidben): It would be tidier if this build knob could be replaced
// with an internal lazy-init mechanism that would handle things correctly
// in-library. https://crbug.com/542879
#if defined(BORINGSSL_NO_STATIC_INITIALIZER)
CRYPTO_once(&once, do_library_init);
#endif
}
int CRYPTO_is_confidential_build(void) {
#if defined(BORINGSSL_CONFIDENTIAL)
return 1;
#else
return 0;
#endif
}
int CRYPTO_has_asm(void) {
#if defined(OPENSSL_NO_ASM)
return 0;
#else
return 1;
#endif
}
const char *SSLeay_version(int which) { return OpenSSL_version(which); }
const char *OpenSSL_version(int which) {
switch (which) {
case OPENSSL_VERSION:
return "BoringSSL";
case OPENSSL_CFLAGS:
return "compiler: n/a";
case OPENSSL_BUILT_ON:
return "built on: n/a";
case OPENSSL_PLATFORM:
return "platform: n/a";
case OPENSSL_DIR:
return "OPENSSLDIR: n/a";
default:
return "not available";
}
}
unsigned long SSLeay(void) { return OPENSSL_VERSION_NUMBER; }
unsigned long OpenSSL_version_num(void) { return OPENSSL_VERSION_NUMBER; }
int CRYPTO_malloc_init(void) { return 1; }
int OPENSSL_malloc_init(void) { return 1; }
void ENGINE_load_builtin_engines(void) {}
int ENGINE_register_all_complete(void) { return 1; }
void OPENSSL_load_builtin_modules(void) {}
int OPENSSL_init_crypto(uint64_t opts, const OPENSSL_INIT_SETTINGS *settings) {
CRYPTO_library_init();
return 1;
}
void OPENSSL_cleanup(void) {}

View File

@ -0,0 +1,850 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.]
*/
/* ====================================================================
* Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*
* This product includes cryptographic software written by Eric Young
* (eay@cryptsoft.com). This product includes software written by Tim
* Hudson (tjh@cryptsoft.com). */
#include <CBigNumBoringSSL_err.h>
#include <assert.h>
#include <errno.h>
#include <inttypes.h>
#include <string.h>
#if defined(OPENSSL_WINDOWS)
OPENSSL_MSVC_PRAGMA(warning(push, 3))
#include <windows.h>
OPENSSL_MSVC_PRAGMA(warning(pop))
#endif
#include <CBigNumBoringSSL_mem.h>
#include <CBigNumBoringSSL_thread.h>
#include "../internal.h"
#include "./internal.h"
struct err_error_st {
// file contains the filename where the error occurred.
const char *file;
// data contains a NUL-terminated string with optional data. It must be freed
// with |OPENSSL_free|.
char *data;
// packed contains the error library and reason, as packed by ERR_PACK.
uint32_t packed;
// line contains the line number where the error occurred.
uint16_t line;
// mark indicates a reversion point in the queue. See |ERR_pop_to_mark|.
unsigned mark : 1;
};
// ERR_STATE contains the per-thread, error queue.
typedef struct err_state_st {
// errors contains the ERR_NUM_ERRORS most recent errors, organised as a ring
// buffer.
struct err_error_st errors[ERR_NUM_ERRORS];
// top contains the index one past the most recent error. If |top| equals
// |bottom| then the queue is empty.
unsigned top;
// bottom contains the index of the last error in the queue.
unsigned bottom;
// to_free, if not NULL, contains a pointer owned by this structure that was
// previously a |data| pointer of one of the elements of |errors|.
void *to_free;
} ERR_STATE;
extern const uint32_t kOpenSSLReasonValues[];
extern const size_t kOpenSSLReasonValuesLen;
extern const char kOpenSSLReasonStringData[];
// err_clear clears the given queued error.
static void err_clear(struct err_error_st *error) {
OPENSSL_free(error->data);
OPENSSL_memset(error, 0, sizeof(struct err_error_st));
}
static void err_copy(struct err_error_st *dst, const struct err_error_st *src) {
err_clear(dst);
dst->file = src->file;
if (src->data != NULL) {
dst->data = OPENSSL_strdup(src->data);
}
dst->packed = src->packed;
dst->line = src->line;
}
// global_next_library contains the next custom library value to return.
static int global_next_library = ERR_NUM_LIBS;
// global_next_library_mutex protects |global_next_library| from concurrent
// updates.
static struct CRYPTO_STATIC_MUTEX global_next_library_mutex =
CRYPTO_STATIC_MUTEX_INIT;
static void err_state_free(void *statep) {
ERR_STATE *state = statep;
if (state == NULL) {
return;
}
for (unsigned i = 0; i < ERR_NUM_ERRORS; i++) {
err_clear(&state->errors[i]);
}
OPENSSL_free(state->to_free);
OPENSSL_free(state);
}
// err_get_state gets the ERR_STATE object for the current thread.
static ERR_STATE *err_get_state(void) {
ERR_STATE *state = CRYPTO_get_thread_local(OPENSSL_THREAD_LOCAL_ERR);
if (state == NULL) {
state = OPENSSL_malloc(sizeof(ERR_STATE));
if (state == NULL) {
return NULL;
}
OPENSSL_memset(state, 0, sizeof(ERR_STATE));
if (!CRYPTO_set_thread_local(OPENSSL_THREAD_LOCAL_ERR, state,
err_state_free)) {
return NULL;
}
}
return state;
}
static uint32_t get_error_values(int inc, int top, const char **file, int *line,
const char **data, int *flags) {
unsigned i = 0;
ERR_STATE *state;
struct err_error_st *error;
uint32_t ret;
state = err_get_state();
if (state == NULL || state->bottom == state->top) {
return 0;
}
if (top) {
assert(!inc);
// last error
i = state->top;
} else {
i = (state->bottom + 1) % ERR_NUM_ERRORS;
}
error = &state->errors[i];
ret = error->packed;
if (file != NULL && line != NULL) {
if (error->file == NULL) {
*file = "NA";
*line = 0;
} else {
*file = error->file;
*line = error->line;
}
}
if (data != NULL) {
if (error->data == NULL) {
*data = "";
if (flags != NULL) {
*flags = 0;
}
} else {
*data = error->data;
if (flags != NULL) {
*flags = ERR_FLAG_STRING;
}
// If this error is being removed, take ownership of data from
// the error. The semantics are such that the caller doesn't
// take ownership either. Instead the error system takes
// ownership and retains it until the next call that affects the
// error queue.
if (inc) {
if (error->data != NULL) {
OPENSSL_free(state->to_free);
state->to_free = error->data;
}
error->data = NULL;
}
}
}
if (inc) {
assert(!top);
err_clear(error);
state->bottom = i;
}
return ret;
}
uint32_t ERR_get_error(void) {
return get_error_values(1 /* inc */, 0 /* bottom */, NULL, NULL, NULL, NULL);
}
uint32_t ERR_get_error_line(const char **file, int *line) {
return get_error_values(1 /* inc */, 0 /* bottom */, file, line, NULL, NULL);
}
uint32_t ERR_get_error_line_data(const char **file, int *line,
const char **data, int *flags) {
return get_error_values(1 /* inc */, 0 /* bottom */, file, line, data, flags);
}
uint32_t ERR_peek_error(void) {
return get_error_values(0 /* peek */, 0 /* bottom */, NULL, NULL, NULL, NULL);
}
uint32_t ERR_peek_error_line(const char **file, int *line) {
return get_error_values(0 /* peek */, 0 /* bottom */, file, line, NULL, NULL);
}
uint32_t ERR_peek_error_line_data(const char **file, int *line,
const char **data, int *flags) {
return get_error_values(0 /* peek */, 0 /* bottom */, file, line, data,
flags);
}
uint32_t ERR_peek_last_error(void) {
return get_error_values(0 /* peek */, 1 /* top */, NULL, NULL, NULL, NULL);
}
uint32_t ERR_peek_last_error_line(const char **file, int *line) {
return get_error_values(0 /* peek */, 1 /* top */, file, line, NULL, NULL);
}
uint32_t ERR_peek_last_error_line_data(const char **file, int *line,
const char **data, int *flags) {
return get_error_values(0 /* peek */, 1 /* top */, file, line, data, flags);
}
void ERR_clear_error(void) {
ERR_STATE *const state = err_get_state();
unsigned i;
if (state == NULL) {
return;
}
for (i = 0; i < ERR_NUM_ERRORS; i++) {
err_clear(&state->errors[i]);
}
OPENSSL_free(state->to_free);
state->to_free = NULL;
state->top = state->bottom = 0;
}
void ERR_remove_thread_state(const CRYPTO_THREADID *tid) {
if (tid != NULL) {
assert(0);
return;
}
ERR_clear_error();
}
int ERR_get_next_error_library(void) {
int ret;
CRYPTO_STATIC_MUTEX_lock_write(&global_next_library_mutex);
ret = global_next_library++;
CRYPTO_STATIC_MUTEX_unlock_write(&global_next_library_mutex);
return ret;
}
void ERR_remove_state(unsigned long pid) {
ERR_clear_error();
}
void ERR_clear_system_error(void) {
errno = 0;
}
char *ERR_error_string(uint32_t packed_error, char *ret) {
static char buf[ERR_ERROR_STRING_BUF_LEN];
if (ret == NULL) {
// TODO(fork): remove this.
ret = buf;
}
#if !defined(NDEBUG)
// This is aimed to help catch callers who don't provide
// |ERR_ERROR_STRING_BUF_LEN| bytes of space.
OPENSSL_memset(ret, 0, ERR_ERROR_STRING_BUF_LEN);
#endif
return ERR_error_string_n(packed_error, ret, ERR_ERROR_STRING_BUF_LEN);
}
char *ERR_error_string_n(uint32_t packed_error, char *buf, size_t len) {
char lib_buf[64], reason_buf[64];
const char *lib_str, *reason_str;
unsigned lib, reason;
if (len == 0) {
return NULL;
}
lib = ERR_GET_LIB(packed_error);
reason = ERR_GET_REASON(packed_error);
lib_str = ERR_lib_error_string(packed_error);
reason_str = ERR_reason_error_string(packed_error);
if (lib_str == NULL) {
BIO_snprintf(lib_buf, sizeof(lib_buf), "lib(%u)", lib);
lib_str = lib_buf;
}
if (reason_str == NULL) {
BIO_snprintf(reason_buf, sizeof(reason_buf), "reason(%u)", reason);
reason_str = reason_buf;
}
BIO_snprintf(buf, len, "error:%08" PRIx32 ":%s:OPENSSL_internal:%s",
packed_error, lib_str, reason_str);
if (strlen(buf) == len - 1) {
// output may be truncated; make sure we always have 5 colon-separated
// fields, i.e. 4 colons.
static const unsigned num_colons = 4;
unsigned i;
char *s = buf;
if (len <= num_colons) {
// In this situation it's not possible to ensure that the correct number
// of colons are included in the output.
return buf;
}
for (i = 0; i < num_colons; i++) {
char *colon = strchr(s, ':');
char *last_pos = &buf[len - 1] - num_colons + i;
if (colon == NULL || colon > last_pos) {
// set colon |i| at last possible position (buf[len-1] is the
// terminating 0). If we're setting this colon, then all whole of the
// rest of the string must be colons in order to have the correct
// number.
OPENSSL_memset(last_pos, ':', num_colons - i);
break;
}
s = colon + 1;
}
}
return buf;
}
// err_string_cmp is a compare function for searching error values with
// |bsearch| in |err_string_lookup|.
static int err_string_cmp(const void *a, const void *b) {
const uint32_t a_key = *((const uint32_t*) a) >> 15;
const uint32_t b_key = *((const uint32_t*) b) >> 15;
if (a_key < b_key) {
return -1;
} else if (a_key > b_key) {
return 1;
} else {
return 0;
}
}
// err_string_lookup looks up the string associated with |lib| and |key| in
// |values| and |string_data|. It returns the string or NULL if not found.
static const char *err_string_lookup(uint32_t lib, uint32_t key,
const uint32_t *values,
size_t num_values,
const char *string_data) {
// |values| points to data in err_data.h, which is generated by
// err_data_generate.go. It's an array of uint32_t values. Each value has the
// following structure:
// | lib | key | offset |
// |6 bits| 11 bits | 15 bits |
//
// The |lib| value is a library identifier: one of the |ERR_LIB_*| values.
// The |key| is a reason code, depending on the context.
// The |offset| is the number of bytes from the start of |string_data| where
// the (NUL terminated) string for this value can be found.
//
// Values are sorted based on treating the |lib| and |key| part as an
// unsigned integer.
if (lib >= (1 << 6) || key >= (1 << 11)) {
return NULL;
}
uint32_t search_key = lib << 26 | key << 15;
const uint32_t *result = bsearch(&search_key, values, num_values,
sizeof(uint32_t), err_string_cmp);
if (result == NULL) {
return NULL;
}
return &string_data[(*result) & 0x7fff];
}
static const char *const kLibraryNames[ERR_NUM_LIBS] = {
"invalid library (0)",
"unknown library", // ERR_LIB_NONE
"system library", // ERR_LIB_SYS
"bignum routines", // ERR_LIB_BN
"RSA routines", // ERR_LIB_RSA
"Diffie-Hellman routines", // ERR_LIB_DH
"public key routines", // ERR_LIB_EVP
"memory buffer routines", // ERR_LIB_BUF
"object identifier routines", // ERR_LIB_OBJ
"PEM routines", // ERR_LIB_PEM
"DSA routines", // ERR_LIB_DSA
"X.509 certificate routines", // ERR_LIB_X509
"ASN.1 encoding routines", // ERR_LIB_ASN1
"configuration file routines", // ERR_LIB_CONF
"common libcrypto routines", // ERR_LIB_CRYPTO
"elliptic curve routines", // ERR_LIB_EC
"SSL routines", // ERR_LIB_SSL
"BIO routines", // ERR_LIB_BIO
"PKCS7 routines", // ERR_LIB_PKCS7
"PKCS8 routines", // ERR_LIB_PKCS8
"X509 V3 routines", // ERR_LIB_X509V3
"random number generator", // ERR_LIB_RAND
"ENGINE routines", // ERR_LIB_ENGINE
"OCSP routines", // ERR_LIB_OCSP
"UI routines", // ERR_LIB_UI
"COMP routines", // ERR_LIB_COMP
"ECDSA routines", // ERR_LIB_ECDSA
"ECDH routines", // ERR_LIB_ECDH
"HMAC routines", // ERR_LIB_HMAC
"Digest functions", // ERR_LIB_DIGEST
"Cipher functions", // ERR_LIB_CIPHER
"HKDF functions", // ERR_LIB_HKDF
"Trust Token functions", // ERR_LIB_TRUST_TOKEN
"User defined functions", // ERR_LIB_USER
};
const char *ERR_lib_error_string(uint32_t packed_error) {
const uint32_t lib = ERR_GET_LIB(packed_error);
if (lib >= ERR_NUM_LIBS) {
return NULL;
}
return kLibraryNames[lib];
}
const char *ERR_func_error_string(uint32_t packed_error) {
return "OPENSSL_internal";
}
const char *ERR_reason_error_string(uint32_t packed_error) {
const uint32_t lib = ERR_GET_LIB(packed_error);
const uint32_t reason = ERR_GET_REASON(packed_error);
if (lib == ERR_LIB_SYS) {
if (reason < 127) {
return strerror(reason);
}
return NULL;
}
if (reason < ERR_NUM_LIBS) {
return kLibraryNames[reason];
}
if (reason < 100) {
switch (reason) {
case ERR_R_MALLOC_FAILURE:
return "malloc failure";
case ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED:
return "function should not have been called";
case ERR_R_PASSED_NULL_PARAMETER:
return "passed a null parameter";
case ERR_R_INTERNAL_ERROR:
return "internal error";
case ERR_R_OVERFLOW:
return "overflow";
default:
return NULL;
}
}
return err_string_lookup(lib, reason, kOpenSSLReasonValues,
kOpenSSLReasonValuesLen, kOpenSSLReasonStringData);
}
void ERR_print_errors_cb(ERR_print_errors_callback_t callback, void *ctx) {
char buf[ERR_ERROR_STRING_BUF_LEN];
char buf2[1024];
const char *file, *data;
int line, flags;
uint32_t packed_error;
// thread_hash is the least-significant bits of the |ERR_STATE| pointer value
// for this thread.
const unsigned long thread_hash = (uintptr_t) err_get_state();
for (;;) {
packed_error = ERR_get_error_line_data(&file, &line, &data, &flags);
if (packed_error == 0) {
break;
}
ERR_error_string_n(packed_error, buf, sizeof(buf));
BIO_snprintf(buf2, sizeof(buf2), "%lu:%s:%s:%d:%s\n", thread_hash, buf,
file, line, (flags & ERR_FLAG_STRING) ? data : "");
if (callback(buf2, strlen(buf2), ctx) <= 0) {
break;
}
}
}
static int print_errors_to_file(const char* msg, size_t msg_len, void* ctx) {
assert(msg[msg_len] == '\0');
FILE* fp = ctx;
int res = fputs(msg, fp);
return res < 0 ? 0 : 1;
}
void ERR_print_errors_fp(FILE *file) {
ERR_print_errors_cb(print_errors_to_file, file);
}
// err_set_error_data sets the data on the most recent error.
static void err_set_error_data(char *data) {
ERR_STATE *const state = err_get_state();
struct err_error_st *error;
if (state == NULL || state->top == state->bottom) {
OPENSSL_free(data);
return;
}
error = &state->errors[state->top];
OPENSSL_free(error->data);
error->data = data;
}
void ERR_put_error(int library, int unused, int reason, const char *file,
unsigned line) {
ERR_STATE *const state = err_get_state();
struct err_error_st *error;
if (state == NULL) {
return;
}
if (library == ERR_LIB_SYS && reason == 0) {
#if defined(OPENSSL_WINDOWS)
reason = GetLastError();
#else
reason = errno;
#endif
}
state->top = (state->top + 1) % ERR_NUM_ERRORS;
if (state->top == state->bottom) {
state->bottom = (state->bottom + 1) % ERR_NUM_ERRORS;
}
error = &state->errors[state->top];
err_clear(error);
error->file = file;
error->line = line;
error->packed = ERR_PACK(library, reason);
}
// ERR_add_error_data_vdata takes a variable number of const char* pointers,
// concatenates them and sets the result as the data on the most recent
// error.
static void err_add_error_vdata(unsigned num, va_list args) {
size_t alloced, new_len, len = 0, substr_len;
char *buf;
const char *substr;
unsigned i;
alloced = 80;
buf = OPENSSL_malloc(alloced + 1);
if (buf == NULL) {
return;
}
for (i = 0; i < num; i++) {
substr = va_arg(args, const char *);
if (substr == NULL) {
continue;
}
substr_len = strlen(substr);
new_len = len + substr_len;
if (new_len > alloced) {
char *new_buf;
if (alloced + 20 + 1 < alloced) {
// overflow.
OPENSSL_free(buf);
return;
}
alloced = new_len + 20;
new_buf = OPENSSL_realloc(buf, alloced + 1);
if (new_buf == NULL) {
OPENSSL_free(buf);
return;
}
buf = new_buf;
}
OPENSSL_memcpy(buf + len, substr, substr_len);
len = new_len;
}
buf[len] = 0;
err_set_error_data(buf);
}
void ERR_add_error_data(unsigned count, ...) {
va_list args;
va_start(args, count);
err_add_error_vdata(count, args);
va_end(args);
}
void ERR_add_error_dataf(const char *format, ...) {
va_list ap;
char *buf;
static const unsigned buf_len = 256;
// A fixed-size buffer is used because va_copy (which would be needed in
// order to call vsnprintf twice and measure the buffer) wasn't defined until
// C99.
buf = OPENSSL_malloc(buf_len + 1);
if (buf == NULL) {
return;
}
va_start(ap, format);
BIO_vsnprintf(buf, buf_len, format, ap);
buf[buf_len] = 0;
va_end(ap);
err_set_error_data(buf);
}
int ERR_set_mark(void) {
ERR_STATE *const state = err_get_state();
if (state == NULL || state->bottom == state->top) {
return 0;
}
state->errors[state->top].mark = 1;
return 1;
}
int ERR_pop_to_mark(void) {
ERR_STATE *const state = err_get_state();
if (state == NULL) {
return 0;
}
while (state->bottom != state->top) {
struct err_error_st *error = &state->errors[state->top];
if (error->mark) {
error->mark = 0;
return 1;
}
err_clear(error);
if (state->top == 0) {
state->top = ERR_NUM_ERRORS - 1;
} else {
state->top--;
}
}
return 0;
}
void ERR_load_crypto_strings(void) {}
void ERR_free_strings(void) {}
void ERR_load_BIO_strings(void) {}
void ERR_load_ERR_strings(void) {}
void ERR_load_RAND_strings(void) {}
struct err_save_state_st {
struct err_error_st *errors;
size_t num_errors;
};
void ERR_SAVE_STATE_free(ERR_SAVE_STATE *state) {
if (state == NULL) {
return;
}
for (size_t i = 0; i < state->num_errors; i++) {
err_clear(&state->errors[i]);
}
OPENSSL_free(state->errors);
OPENSSL_free(state);
}
ERR_SAVE_STATE *ERR_save_state(void) {
ERR_STATE *const state = err_get_state();
if (state == NULL || state->top == state->bottom) {
return NULL;
}
ERR_SAVE_STATE *ret = OPENSSL_malloc(sizeof(ERR_SAVE_STATE));
if (ret == NULL) {
return NULL;
}
// Errors are stored in the range (bottom, top].
size_t num_errors = state->top >= state->bottom
? state->top - state->bottom
: ERR_NUM_ERRORS + state->top - state->bottom;
assert(num_errors < ERR_NUM_ERRORS);
ret->errors = OPENSSL_malloc(num_errors * sizeof(struct err_error_st));
if (ret->errors == NULL) {
OPENSSL_free(ret);
return NULL;
}
OPENSSL_memset(ret->errors, 0, num_errors * sizeof(struct err_error_st));
ret->num_errors = num_errors;
for (size_t i = 0; i < num_errors; i++) {
size_t j = (state->bottom + i + 1) % ERR_NUM_ERRORS;
err_copy(&ret->errors[i], &state->errors[j]);
}
return ret;
}
void ERR_restore_state(const ERR_SAVE_STATE *state) {
if (state == NULL || state->num_errors == 0) {
ERR_clear_error();
return;
}
ERR_STATE *const dst = err_get_state();
if (dst == NULL) {
return;
}
for (size_t i = 0; i < state->num_errors; i++) {
err_copy(&dst->errors[i], &state->errors[i]);
}
dst->top = state->num_errors - 1;
dst->bottom = ERR_NUM_ERRORS - 1;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,58 @@
/* Copyright (c) 2017, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#ifndef OPENSSL_HEADER_CRYPTO_ERR_INTERNAL_H
#define OPENSSL_HEADER_CRYPTO_ERR_INTERNAL_H
#include <CBigNumBoringSSL_err.h>
#if defined(__cplusplus)
extern "C" {
#endif
// Private error queue functions.
// ERR_SAVE_STATE contains a saved representation of the error queue. It is
// slightly more compact than |ERR_STATE| as the error queue will typically not
// contain |ERR_NUM_ERRORS| entries.
typedef struct err_save_state_st ERR_SAVE_STATE;
// ERR_SAVE_STATE_free releases all memory associated with |state|.
OPENSSL_EXPORT void ERR_SAVE_STATE_free(ERR_SAVE_STATE *state);
// ERR_save_state returns a newly-allocated |ERR_SAVE_STATE| structure
// containing the current state of the error queue or NULL on allocation
// error. It should be released with |ERR_SAVE_STATE_free|.
OPENSSL_EXPORT ERR_SAVE_STATE *ERR_save_state(void);
// ERR_restore_state clears the error queue and replaces it with |state|.
OPENSSL_EXPORT void ERR_restore_state(const ERR_SAVE_STATE *state);
#if defined(__cplusplus)
} // extern C
extern "C++" {
BSSL_NAMESPACE_BEGIN
BORINGSSL_MAKE_DELETER(ERR_SAVE_STATE, ERR_SAVE_STATE_free)
BSSL_NAMESPACE_END
} // extern C++
#endif
#endif // OPENSSL_HEADER_CRYPTO_ERR_INTERNAL_H

View File

@ -0,0 +1,261 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.]
*/
/* ====================================================================
* Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*
* This product includes cryptographic software written by Eric Young
* (eay@cryptsoft.com). This product includes software written by Tim
* Hudson (tjh@cryptsoft.com). */
#include <CBigNumBoringSSL_ex_data.h>
#include <assert.h>
#include <string.h>
#include <CBigNumBoringSSL_crypto.h>
#include <CBigNumBoringSSL_err.h>
#include <CBigNumBoringSSL_mem.h>
#include <CBigNumBoringSSL_stack.h>
#include <CBigNumBoringSSL_thread.h>
#include "internal.h"
DEFINE_STACK_OF(CRYPTO_EX_DATA_FUNCS)
struct crypto_ex_data_func_st {
long argl; // Arbitary long
void *argp; // Arbitary void pointer
CRYPTO_EX_free *free_func;
};
int CRYPTO_get_ex_new_index(CRYPTO_EX_DATA_CLASS *ex_data_class, int *out_index,
long argl, void *argp, CRYPTO_EX_free *free_func) {
CRYPTO_EX_DATA_FUNCS *funcs;
int ret = 0;
funcs = OPENSSL_malloc(sizeof(CRYPTO_EX_DATA_FUNCS));
if (funcs == NULL) {
OPENSSL_PUT_ERROR(CRYPTO, ERR_R_MALLOC_FAILURE);
return 0;
}
funcs->argl = argl;
funcs->argp = argp;
funcs->free_func = free_func;
CRYPTO_STATIC_MUTEX_lock_write(&ex_data_class->lock);
if (ex_data_class->meth == NULL) {
ex_data_class->meth = sk_CRYPTO_EX_DATA_FUNCS_new_null();
}
if (ex_data_class->meth == NULL ||
!sk_CRYPTO_EX_DATA_FUNCS_push(ex_data_class->meth, funcs)) {
OPENSSL_PUT_ERROR(CRYPTO, ERR_R_MALLOC_FAILURE);
OPENSSL_free(funcs);
goto err;
}
*out_index = sk_CRYPTO_EX_DATA_FUNCS_num(ex_data_class->meth) - 1 +
ex_data_class->num_reserved;
ret = 1;
err:
CRYPTO_STATIC_MUTEX_unlock_write(&ex_data_class->lock);
return ret;
}
int CRYPTO_set_ex_data(CRYPTO_EX_DATA *ad, int index, void *val) {
int n, i;
if (ad->sk == NULL) {
ad->sk = sk_void_new_null();
if (ad->sk == NULL) {
OPENSSL_PUT_ERROR(CRYPTO, ERR_R_MALLOC_FAILURE);
return 0;
}
}
n = sk_void_num(ad->sk);
// Add NULL values until the stack is long enough.
for (i = n; i <= index; i++) {
if (!sk_void_push(ad->sk, NULL)) {
OPENSSL_PUT_ERROR(CRYPTO, ERR_R_MALLOC_FAILURE);
return 0;
}
}
sk_void_set(ad->sk, index, val);
return 1;
}
void *CRYPTO_get_ex_data(const CRYPTO_EX_DATA *ad, int idx) {
if (ad->sk == NULL || idx < 0 || (size_t)idx >= sk_void_num(ad->sk)) {
return NULL;
}
return sk_void_value(ad->sk, idx);
}
// get_func_pointers takes a copy of the CRYPTO_EX_DATA_FUNCS pointers, if any,
// for the given class. If there are some pointers, it sets |*out| to point to
// a fresh stack of them. Otherwise it sets |*out| to NULL. It returns one on
// success or zero on error.
static int get_func_pointers(STACK_OF(CRYPTO_EX_DATA_FUNCS) **out,
CRYPTO_EX_DATA_CLASS *ex_data_class) {
size_t n;
*out = NULL;
// CRYPTO_EX_DATA_FUNCS structures are static once set, so we can take a
// shallow copy of the list under lock and then use the structures without
// the lock held.
CRYPTO_STATIC_MUTEX_lock_read(&ex_data_class->lock);
n = sk_CRYPTO_EX_DATA_FUNCS_num(ex_data_class->meth);
if (n > 0) {
*out = sk_CRYPTO_EX_DATA_FUNCS_dup(ex_data_class->meth);
}
CRYPTO_STATIC_MUTEX_unlock_read(&ex_data_class->lock);
if (n > 0 && *out == NULL) {
OPENSSL_PUT_ERROR(CRYPTO, ERR_R_MALLOC_FAILURE);
return 0;
}
return 1;
}
void CRYPTO_new_ex_data(CRYPTO_EX_DATA *ad) {
ad->sk = NULL;
}
void CRYPTO_free_ex_data(CRYPTO_EX_DATA_CLASS *ex_data_class, void *obj,
CRYPTO_EX_DATA *ad) {
if (ad->sk == NULL) {
// Nothing to do.
return;
}
STACK_OF(CRYPTO_EX_DATA_FUNCS) *func_pointers;
if (!get_func_pointers(&func_pointers, ex_data_class)) {
// TODO(davidben): This leaks memory on malloc error.
return;
}
for (size_t i = 0; i < sk_CRYPTO_EX_DATA_FUNCS_num(func_pointers); i++) {
CRYPTO_EX_DATA_FUNCS *func_pointer =
sk_CRYPTO_EX_DATA_FUNCS_value(func_pointers, i);
if (func_pointer->free_func) {
void *ptr = CRYPTO_get_ex_data(ad, i + ex_data_class->num_reserved);
func_pointer->free_func(obj, ptr, ad, i + ex_data_class->num_reserved,
func_pointer->argl, func_pointer->argp);
}
}
sk_CRYPTO_EX_DATA_FUNCS_free(func_pointers);
sk_void_free(ad->sk);
ad->sk = NULL;
}
void CRYPTO_cleanup_all_ex_data(void) {}

View File

@ -0,0 +1,108 @@
/* ====================================================================
* Copyright (c) 2002-2006 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ==================================================================== */
#include <CBigNumBoringSSL_aes.h>
#include <assert.h>
#include <CBigNumBoringSSL_cpu.h>
#include "internal.h"
#include "../modes/internal.h"
// Be aware that different sets of AES functions use incompatible key
// representations, varying in format of the key schedule, the |AES_KEY.rounds|
// value, or both. Therefore they cannot mix. Also, on AArch64, the plain-C
// code, above, is incompatible with the |aes_hw_*| functions.
void AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
if (hwaes_capable()) {
aes_hw_encrypt(in, out, key);
} else if (vpaes_capable()) {
vpaes_encrypt(in, out, key);
} else {
aes_nohw_encrypt(in, out, key);
}
}
void AES_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
if (hwaes_capable()) {
aes_hw_decrypt(in, out, key);
} else if (vpaes_capable()) {
vpaes_decrypt(in, out, key);
} else {
aes_nohw_decrypt(in, out, key);
}
}
int AES_set_encrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey) {
if (bits != 128 && bits != 192 && bits != 256) {
return -2;
}
if (hwaes_capable()) {
return aes_hw_set_encrypt_key(key, bits, aeskey);
} else if (vpaes_capable()) {
return vpaes_set_encrypt_key(key, bits, aeskey);
} else {
return aes_nohw_set_encrypt_key(key, bits, aeskey);
}
}
int AES_set_decrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey) {
if (bits != 128 && bits != 192 && bits != 256) {
return -2;
}
if (hwaes_capable()) {
return aes_hw_set_decrypt_key(key, bits, aeskey);
} else if (vpaes_capable()) {
return vpaes_set_decrypt_key(key, bits, aeskey);
} else {
return aes_nohw_set_decrypt_key(key, bits, aeskey);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,238 @@
/* Copyright (c) 2017, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#ifndef OPENSSL_HEADER_AES_INTERNAL_H
#define OPENSSL_HEADER_AES_INTERNAL_H
#include <stdlib.h>
#include <CBigNumBoringSSL_cpu.h>
#if defined(__cplusplus)
extern "C" {
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
#define HWAES
#define HWAES_ECB
OPENSSL_INLINE int hwaes_capable(void) {
return (OPENSSL_ia32cap_get()[1] & (1 << (57 - 32))) != 0;
}
#define VPAES
#if defined(OPENSSL_X86_64)
#define VPAES_CTR32
#endif
#define VPAES_CBC
OPENSSL_INLINE int vpaes_capable(void) {
return (OPENSSL_ia32cap_get()[1] & (1 << (41 - 32))) != 0;
}
#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
#define HWAES
OPENSSL_INLINE int hwaes_capable(void) { return CRYPTO_is_ARMv8_AES_capable(); }
#if defined(OPENSSL_ARM)
#define BSAES
#define VPAES
#define VPAES_CTR32
OPENSSL_INLINE int bsaes_capable(void) { return CRYPTO_is_NEON_capable(); }
OPENSSL_INLINE int vpaes_capable(void) { return CRYPTO_is_NEON_capable(); }
#endif
#if defined(OPENSSL_AARCH64)
#define VPAES
#define VPAES_CBC
#define VPAES_CTR32
OPENSSL_INLINE int vpaes_capable(void) { return CRYPTO_is_NEON_capable(); }
#endif
#elif defined(OPENSSL_PPC64LE)
#define HWAES
OPENSSL_INLINE int hwaes_capable(void) {
return CRYPTO_is_PPC64LE_vcrypto_capable();
}
#endif
#endif // !NO_ASM
#if defined(HWAES)
int aes_hw_set_encrypt_key(const uint8_t *user_key, const int bits,
AES_KEY *key);
int aes_hw_set_decrypt_key(const uint8_t *user_key, const int bits,
AES_KEY *key);
void aes_hw_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
void aes_hw_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
void aes_hw_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key, uint8_t *ivec, const int enc);
void aes_hw_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, const uint8_t ivec[16]);
#else
// If HWAES isn't defined then we provide dummy functions for each of the hwaes
// functions.
OPENSSL_INLINE int hwaes_capable(void) { return 0; }
OPENSSL_INLINE int aes_hw_set_encrypt_key(const uint8_t *user_key, int bits,
AES_KEY *key) {
abort();
}
OPENSSL_INLINE int aes_hw_set_decrypt_key(const uint8_t *user_key, int bits,
AES_KEY *key) {
abort();
}
OPENSSL_INLINE void aes_hw_encrypt(const uint8_t *in, uint8_t *out,
const AES_KEY *key) {
abort();
}
OPENSSL_INLINE void aes_hw_decrypt(const uint8_t *in, uint8_t *out,
const AES_KEY *key) {
abort();
}
OPENSSL_INLINE void aes_hw_cbc_encrypt(const uint8_t *in, uint8_t *out,
size_t length, const AES_KEY *key,
uint8_t *ivec, int enc) {
abort();
}
OPENSSL_INLINE void aes_hw_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
size_t len, const AES_KEY *key,
const uint8_t ivec[16]) {
abort();
}
#endif // !HWAES
#if defined(HWAES_ECB)
void aes_hw_ecb_encrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key, const int enc);
#endif // HWAES_ECB
#if defined(BSAES)
// Note |bsaes_cbc_encrypt| requires |enc| to be zero.
void bsaes_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key, uint8_t ivec[16], int enc);
void bsaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, const uint8_t ivec[16]);
// VPAES to BSAES conversions are available on all BSAES platforms.
void vpaes_encrypt_key_to_bsaes(AES_KEY *out_bsaes, const AES_KEY *vpaes);
void vpaes_decrypt_key_to_bsaes(AES_KEY *out_bsaes, const AES_KEY *vpaes);
#else
OPENSSL_INLINE char bsaes_capable(void) { return 0; }
// On other platforms, bsaes_capable() will always return false and so the
// following will never be called.
OPENSSL_INLINE void bsaes_cbc_encrypt(const uint8_t *in, uint8_t *out,
size_t length, const AES_KEY *key,
uint8_t ivec[16], int enc) {
abort();
}
OPENSSL_INLINE void bsaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
size_t len, const AES_KEY *key,
const uint8_t ivec[16]) {
abort();
}
OPENSSL_INLINE void vpaes_encrypt_key_to_bsaes(AES_KEY *out_bsaes,
const AES_KEY *vpaes) {
abort();
}
OPENSSL_INLINE void vpaes_decrypt_key_to_bsaes(AES_KEY *out_bsaes,
const AES_KEY *vpaes) {
abort();
}
#endif // !BSAES
#if defined(VPAES)
// On platforms where VPAES gets defined (just above), then these functions are
// provided by asm.
int vpaes_set_encrypt_key(const uint8_t *userKey, int bits, AES_KEY *key);
int vpaes_set_decrypt_key(const uint8_t *userKey, int bits, AES_KEY *key);
void vpaes_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
void vpaes_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
#if defined(VPAES_CBC)
void vpaes_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key, uint8_t *ivec, int enc);
#endif
#if defined(VPAES_CTR32)
void vpaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, const uint8_t ivec[16]);
#endif
#else
OPENSSL_INLINE char vpaes_capable(void) { return 0; }
// On other platforms, vpaes_capable() will always return false and so the
// following will never be called.
OPENSSL_INLINE int vpaes_set_encrypt_key(const uint8_t *userKey, int bits,
AES_KEY *key) {
abort();
}
OPENSSL_INLINE int vpaes_set_decrypt_key(const uint8_t *userKey, int bits,
AES_KEY *key) {
abort();
}
OPENSSL_INLINE void vpaes_encrypt(const uint8_t *in, uint8_t *out,
const AES_KEY *key) {
abort();
}
OPENSSL_INLINE void vpaes_decrypt(const uint8_t *in, uint8_t *out,
const AES_KEY *key) {
abort();
}
OPENSSL_INLINE void vpaes_cbc_encrypt(const uint8_t *in, uint8_t *out,
size_t length, const AES_KEY *key,
uint8_t *ivec, int enc) {
abort();
}
#endif // !VPAES
int aes_nohw_set_encrypt_key(const uint8_t *key, unsigned bits,
AES_KEY *aeskey);
int aes_nohw_set_decrypt_key(const uint8_t *key, unsigned bits,
AES_KEY *aeskey);
void aes_nohw_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
void aes_nohw_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
void aes_nohw_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
size_t blocks, const AES_KEY *key,
const uint8_t ivec[16]);
void aes_nohw_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t *ivec, const int enc);
#if defined(__cplusplus)
} // extern C
#endif
#endif // OPENSSL_HEADER_AES_INTERNAL_H

View File

@ -0,0 +1,236 @@
/* ====================================================================
* Copyright (c) 2001-2011 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ==================================================================== */
#include <CBigNumBoringSSL_aes.h>
#include <assert.h>
#include <limits.h>
#include <string.h>
#include <CBigNumBoringSSL_mem.h>
#include "../../internal.h"
// kDefaultIV is the default IV value given in RFC 3394, 2.2.3.1.
static const uint8_t kDefaultIV[] = {
0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6,
};
static const unsigned kBound = 6;
int AES_wrap_key(const AES_KEY *key, const uint8_t *iv, uint8_t *out,
const uint8_t *in, size_t in_len) {
// See RFC 3394, section 2.2.1. Additionally, note that section 2 requires the
// plaintext be at least two 8-byte blocks.
if (in_len > INT_MAX - 8 || in_len < 16 || in_len % 8 != 0) {
return -1;
}
if (iv == NULL) {
iv = kDefaultIV;
}
OPENSSL_memmove(out + 8, in, in_len);
uint8_t A[AES_BLOCK_SIZE];
OPENSSL_memcpy(A, iv, 8);
size_t n = in_len / 8;
for (unsigned j = 0; j < kBound; j++) {
for (size_t i = 1; i <= n; i++) {
OPENSSL_memcpy(A + 8, out + 8 * i, 8);
AES_encrypt(A, A, key);
uint32_t t = (uint32_t)(n * j + i);
A[7] ^= t & 0xff;
A[6] ^= (t >> 8) & 0xff;
A[5] ^= (t >> 16) & 0xff;
A[4] ^= (t >> 24) & 0xff;
OPENSSL_memcpy(out + 8 * i, A + 8, 8);
}
}
OPENSSL_memcpy(out, A, 8);
return (int)in_len + 8;
}
// aes_unwrap_key_inner performs steps one and two from
// https://tools.ietf.org/html/rfc3394#section-2.2.2
static int aes_unwrap_key_inner(const AES_KEY *key, uint8_t *out,
uint8_t out_iv[8], const uint8_t *in,
size_t in_len) {
// See RFC 3394, section 2.2.2. Additionally, note that section 2 requires the
// plaintext be at least two 8-byte blocks, so the ciphertext must be at least
// three blocks.
if (in_len > INT_MAX || in_len < 24 || in_len % 8 != 0) {
return 0;
}
uint8_t A[AES_BLOCK_SIZE];
OPENSSL_memcpy(A, in, 8);
OPENSSL_memmove(out, in + 8, in_len - 8);
size_t n = (in_len / 8) - 1;
for (unsigned j = kBound - 1; j < kBound; j--) {
for (size_t i = n; i > 0; i--) {
uint32_t t = (uint32_t)(n * j + i);
A[7] ^= t & 0xff;
A[6] ^= (t >> 8) & 0xff;
A[5] ^= (t >> 16) & 0xff;
A[4] ^= (t >> 24) & 0xff;
OPENSSL_memcpy(A + 8, out + 8 * (i - 1), 8);
AES_decrypt(A, A, key);
OPENSSL_memcpy(out + 8 * (i - 1), A + 8, 8);
}
}
memcpy(out_iv, A, 8);
return 1;
}
int AES_unwrap_key(const AES_KEY *key, const uint8_t *iv, uint8_t *out,
const uint8_t *in, size_t in_len) {
uint8_t calculated_iv[8];
if (!aes_unwrap_key_inner(key, out, calculated_iv, in, in_len)) {
return -1;
}
if (iv == NULL) {
iv = kDefaultIV;
}
if (CRYPTO_memcmp(calculated_iv, iv, 8) != 0) {
return -1;
}
return (int)in_len - 8;
}
// kPaddingConstant is used in Key Wrap with Padding. See
// https://tools.ietf.org/html/rfc5649#section-3
static const uint8_t kPaddingConstant[4] = {0xa6, 0x59, 0x59, 0xa6};
int AES_wrap_key_padded(const AES_KEY *key, uint8_t *out, size_t *out_len,
size_t max_out, const uint8_t *in, size_t in_len) {
// See https://tools.ietf.org/html/rfc5649#section-4.1
const uint32_t in_len32_be = CRYPTO_bswap4(in_len);
const uint64_t in_len64 = in_len;
const size_t padded_len = (in_len + 7) & ~7;
*out_len = 0;
if (in_len == 0 || in_len64 > 0xffffffffu || in_len + 7 < in_len ||
padded_len + 8 < padded_len || max_out < padded_len + 8) {
return 0;
}
uint8_t block[AES_BLOCK_SIZE];
memcpy(block, kPaddingConstant, sizeof(kPaddingConstant));
memcpy(block + 4, &in_len32_be, sizeof(in_len32_be));
if (in_len <= 8) {
memset(block + 8, 0, 8);
memcpy(block + 8, in, in_len);
AES_encrypt(block, out, key);
*out_len = AES_BLOCK_SIZE;
return 1;
}
uint8_t *padded_in = OPENSSL_malloc(padded_len);
if (padded_in == NULL) {
return 0;
}
assert(padded_len >= 8);
memset(padded_in + padded_len - 8, 0, 8);
memcpy(padded_in, in, in_len);
const int ret = AES_wrap_key(key, block, out, padded_in, padded_len);
OPENSSL_free(padded_in);
if (ret < 0) {
return 0;
}
*out_len = ret;
return 1;
}
int AES_unwrap_key_padded(const AES_KEY *key, uint8_t *out, size_t *out_len,
size_t max_out, const uint8_t *in, size_t in_len) {
*out_len = 0;
if (in_len < AES_BLOCK_SIZE || max_out < in_len - 8) {
return 0;
}
uint8_t iv[8];
if (in_len == AES_BLOCK_SIZE) {
uint8_t block[AES_BLOCK_SIZE];
AES_decrypt(in, block, key);
memcpy(iv, block, sizeof(iv));
memcpy(out, block + 8, 8);
} else if (!aes_unwrap_key_inner(key, out, iv, in, in_len)) {
return 0;
}
assert(in_len % 8 == 0);
crypto_word_t ok = constant_time_eq_int(
CRYPTO_memcmp(iv, kPaddingConstant, sizeof(kPaddingConstant)), 0);
uint32_t claimed_len32;
memcpy(&claimed_len32, iv + 4, sizeof(claimed_len32));
const size_t claimed_len = CRYPTO_bswap4(claimed_len32);
ok &= ~constant_time_is_zero_w(claimed_len);
ok &= constant_time_eq_w((claimed_len - 1) >> 3, (in_len - 9) >> 3);
// Check that padding bytes are all zero.
for (size_t i = in_len - 15; i < in_len - 8; i++) {
ok &= constant_time_is_zero_w(constant_time_ge_8(i, claimed_len) & out[i]);
}
*out_len = constant_time_select_w(ok, claimed_len, 0);
return ok & 1;
}

View File

@ -0,0 +1,106 @@
/* ====================================================================
* Copyright (c) 2002-2006 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ==================================================================== */
#include <CBigNumBoringSSL_aes.h>
#include <assert.h>
#include "../aes/internal.h"
#include "../modes/internal.h"
void AES_ctr128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[AES_BLOCK_SIZE],
uint8_t ecount_buf[AES_BLOCK_SIZE], unsigned int *num) {
CRYPTO_ctr128_encrypt(in, out, len, key, ivec, ecount_buf, num, AES_encrypt);
}
void AES_ecb_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key,
const int enc) {
assert(in && out && key);
assert((AES_ENCRYPT == enc) || (AES_DECRYPT == enc));
if (AES_ENCRYPT == enc) {
AES_encrypt(in, out, key);
} else {
AES_decrypt(in, out, key);
}
}
void AES_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t *ivec, const int enc) {
if (hwaes_capable()) {
aes_hw_cbc_encrypt(in, out, len, key, ivec, enc);
return;
}
if (!vpaes_capable()) {
aes_nohw_cbc_encrypt(in, out, len, key, ivec, enc);
return;
}
if (enc) {
CRYPTO_cbc128_encrypt(in, out, len, key, ivec, AES_encrypt);
} else {
CRYPTO_cbc128_decrypt(in, out, len, key, ivec, AES_decrypt);
}
}
void AES_ofb128_encrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key, uint8_t *ivec, int *num) {
unsigned num_u = (unsigned)(*num);
CRYPTO_ofb128_encrypt(in, out, length, key, ivec, &num_u, AES_encrypt);
*num = (int)num_u;
}
void AES_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t length,
const AES_KEY *key, uint8_t *ivec, int *num,
int enc) {
unsigned num_u = (unsigned)(*num);
CRYPTO_cfb128_encrypt(in, out, length, key, ivec, &num_u, enc, AES_encrypt);
*num = (int)num_u;
}

View File

@ -0,0 +1,859 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__x86_64__) && defined(__linux__)
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
.text
.type _aesni_ctr32_ghash_6x,@function
.align 32
_aesni_ctr32_ghash_6x:
.cfi_startproc
vmovdqu 32(%r11),%xmm2
subq $6,%rdx
vpxor %xmm4,%xmm4,%xmm4
vmovdqu 0-128(%rcx),%xmm15
vpaddb %xmm2,%xmm1,%xmm10
vpaddb %xmm2,%xmm10,%xmm11
vpaddb %xmm2,%xmm11,%xmm12
vpaddb %xmm2,%xmm12,%xmm13
vpaddb %xmm2,%xmm13,%xmm14
vpxor %xmm15,%xmm1,%xmm9
vmovdqu %xmm4,16+8(%rsp)
jmp .Loop6x
.align 32
.Loop6x:
addl $100663296,%ebx
jc .Lhandle_ctr32
vmovdqu 0-32(%r9),%xmm3
vpaddb %xmm2,%xmm14,%xmm1
vpxor %xmm15,%xmm10,%xmm10
vpxor %xmm15,%xmm11,%xmm11
.Lresume_ctr32:
vmovdqu %xmm1,(%r8)
vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5
vpxor %xmm15,%xmm12,%xmm12
vmovups 16-128(%rcx),%xmm2
vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6
xorq %r12,%r12
cmpq %r14,%r15
vaesenc %xmm2,%xmm9,%xmm9
vmovdqu 48+8(%rsp),%xmm0
vpxor %xmm15,%xmm13,%xmm13
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1
vaesenc %xmm2,%xmm10,%xmm10
vpxor %xmm15,%xmm14,%xmm14
setnc %r12b
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
vaesenc %xmm2,%xmm11,%xmm11
vmovdqu 16-32(%r9),%xmm3
negq %r12
vaesenc %xmm2,%xmm12,%xmm12
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5
vpxor %xmm4,%xmm8,%xmm8
vaesenc %xmm2,%xmm13,%xmm13
vpxor %xmm5,%xmm1,%xmm4
andq $0x60,%r12
vmovups 32-128(%rcx),%xmm15
vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1
vaesenc %xmm2,%xmm14,%xmm14
vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2
leaq (%r14,%r12,1),%r14
vaesenc %xmm15,%xmm9,%xmm9
vpxor 16+8(%rsp),%xmm8,%xmm8
vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3
vmovdqu 64+8(%rsp),%xmm0
vaesenc %xmm15,%xmm10,%xmm10
movbeq 88(%r14),%r13
vaesenc %xmm15,%xmm11,%xmm11
movbeq 80(%r14),%r12
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,32+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,40+8(%rsp)
vmovdqu 48-32(%r9),%xmm5
vaesenc %xmm15,%xmm14,%xmm14
vmovups 48-128(%rcx),%xmm15
vpxor %xmm1,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm2,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2
vaesenc %xmm15,%xmm10,%xmm10
vpxor %xmm3,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3
vaesenc %xmm15,%xmm11,%xmm11
vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5
vmovdqu 80+8(%rsp),%xmm0
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vpxor %xmm1,%xmm4,%xmm4
vmovdqu 64-32(%r9),%xmm1
vaesenc %xmm15,%xmm14,%xmm14
vmovups 64-128(%rcx),%xmm15
vpxor %xmm2,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm3,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3
vaesenc %xmm15,%xmm10,%xmm10
movbeq 72(%r14),%r13
vpxor %xmm5,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5
vaesenc %xmm15,%xmm11,%xmm11
movbeq 64(%r14),%r12
vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1
vmovdqu 96+8(%rsp),%xmm0
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,48+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,56+8(%rsp)
vpxor %xmm2,%xmm4,%xmm4
vmovdqu 96-32(%r9),%xmm2
vaesenc %xmm15,%xmm14,%xmm14
vmovups 80-128(%rcx),%xmm15
vpxor %xmm3,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5
vaesenc %xmm15,%xmm10,%xmm10
movbeq 56(%r14),%r13
vpxor %xmm1,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1
vpxor 112+8(%rsp),%xmm8,%xmm8
vaesenc %xmm15,%xmm11,%xmm11
movbeq 48(%r14),%r12
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,64+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,72+8(%rsp)
vpxor %xmm3,%xmm4,%xmm4
vmovdqu 112-32(%r9),%xmm3
vaesenc %xmm15,%xmm14,%xmm14
vmovups 96-128(%rcx),%xmm15
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm1,%xmm6,%xmm6
vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1
vaesenc %xmm15,%xmm10,%xmm10
movbeq 40(%r14),%r13
vpxor %xmm2,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2
vaesenc %xmm15,%xmm11,%xmm11
movbeq 32(%r14),%r12
vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,80+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,88+8(%rsp)
vpxor %xmm5,%xmm6,%xmm6
vaesenc %xmm15,%xmm14,%xmm14
vpxor %xmm1,%xmm6,%xmm6
vmovups 112-128(%rcx),%xmm15
vpslldq $8,%xmm6,%xmm5
vpxor %xmm2,%xmm4,%xmm4
vmovdqu 16(%r11),%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm8,%xmm7,%xmm7
vaesenc %xmm15,%xmm10,%xmm10
vpxor %xmm5,%xmm4,%xmm4
movbeq 24(%r14),%r13
vaesenc %xmm15,%xmm11,%xmm11
movbeq 16(%r14),%r12
vpalignr $8,%xmm4,%xmm4,%xmm0
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
movq %r13,96+8(%rsp)
vaesenc %xmm15,%xmm12,%xmm12
movq %r12,104+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
vmovups 128-128(%rcx),%xmm1
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vmovups 144-128(%rcx),%xmm15
vaesenc %xmm1,%xmm10,%xmm10
vpsrldq $8,%xmm6,%xmm6
vaesenc %xmm1,%xmm11,%xmm11
vpxor %xmm6,%xmm7,%xmm7
vaesenc %xmm1,%xmm12,%xmm12
vpxor %xmm0,%xmm4,%xmm4
movbeq 8(%r14),%r13
vaesenc %xmm1,%xmm13,%xmm13
movbeq 0(%r14),%r12
vaesenc %xmm1,%xmm14,%xmm14
vmovups 160-128(%rcx),%xmm1
cmpl $11,%ebp
jb .Lenc_tail
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vaesenc %xmm1,%xmm10,%xmm10
vaesenc %xmm1,%xmm11,%xmm11
vaesenc %xmm1,%xmm12,%xmm12
vaesenc %xmm1,%xmm13,%xmm13
vmovups 176-128(%rcx),%xmm15
vaesenc %xmm1,%xmm14,%xmm14
vmovups 192-128(%rcx),%xmm1
je .Lenc_tail
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vaesenc %xmm1,%xmm10,%xmm10
vaesenc %xmm1,%xmm11,%xmm11
vaesenc %xmm1,%xmm12,%xmm12
vaesenc %xmm1,%xmm13,%xmm13
vmovups 208-128(%rcx),%xmm15
vaesenc %xmm1,%xmm14,%xmm14
vmovups 224-128(%rcx),%xmm1
jmp .Lenc_tail
.align 32
.Lhandle_ctr32:
vmovdqu (%r11),%xmm0
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
vpaddd 64(%r11),%xmm6,%xmm10
vpaddd %xmm5,%xmm6,%xmm11
vmovdqu 0-32(%r9),%xmm3
vpaddd %xmm5,%xmm10,%xmm12
vpshufb %xmm0,%xmm10,%xmm10
vpaddd %xmm5,%xmm11,%xmm13
vpshufb %xmm0,%xmm11,%xmm11
vpxor %xmm15,%xmm10,%xmm10
vpaddd %xmm5,%xmm12,%xmm14
vpshufb %xmm0,%xmm12,%xmm12
vpxor %xmm15,%xmm11,%xmm11
vpaddd %xmm5,%xmm13,%xmm1
vpshufb %xmm0,%xmm13,%xmm13
vpshufb %xmm0,%xmm14,%xmm14
vpshufb %xmm0,%xmm1,%xmm1
jmp .Lresume_ctr32
.align 32
.Lenc_tail:
vaesenc %xmm15,%xmm9,%xmm9
vmovdqu %xmm7,16+8(%rsp)
vpalignr $8,%xmm4,%xmm4,%xmm8
vaesenc %xmm15,%xmm10,%xmm10
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
vpxor 0(%rdi),%xmm1,%xmm2
vaesenc %xmm15,%xmm11,%xmm11
vpxor 16(%rdi),%xmm1,%xmm0
vaesenc %xmm15,%xmm12,%xmm12
vpxor 32(%rdi),%xmm1,%xmm5
vaesenc %xmm15,%xmm13,%xmm13
vpxor 48(%rdi),%xmm1,%xmm6
vaesenc %xmm15,%xmm14,%xmm14
vpxor 64(%rdi),%xmm1,%xmm7
vpxor 80(%rdi),%xmm1,%xmm3
vmovdqu (%r8),%xmm1
vaesenclast %xmm2,%xmm9,%xmm9
vmovdqu 32(%r11),%xmm2
vaesenclast %xmm0,%xmm10,%xmm10
vpaddb %xmm2,%xmm1,%xmm0
movq %r13,112+8(%rsp)
leaq 96(%rdi),%rdi
vaesenclast %xmm5,%xmm11,%xmm11
vpaddb %xmm2,%xmm0,%xmm5
movq %r12,120+8(%rsp)
leaq 96(%rsi),%rsi
vmovdqu 0-128(%rcx),%xmm15
vaesenclast %xmm6,%xmm12,%xmm12
vpaddb %xmm2,%xmm5,%xmm6
vaesenclast %xmm7,%xmm13,%xmm13
vpaddb %xmm2,%xmm6,%xmm7
vaesenclast %xmm3,%xmm14,%xmm14
vpaddb %xmm2,%xmm7,%xmm3
addq $0x60,%r10
subq $0x6,%rdx
jc .L6x_done
vmovups %xmm9,-96(%rsi)
vpxor %xmm15,%xmm1,%xmm9
vmovups %xmm10,-80(%rsi)
vmovdqa %xmm0,%xmm10
vmovups %xmm11,-64(%rsi)
vmovdqa %xmm5,%xmm11
vmovups %xmm12,-48(%rsi)
vmovdqa %xmm6,%xmm12
vmovups %xmm13,-32(%rsi)
vmovdqa %xmm7,%xmm13
vmovups %xmm14,-16(%rsi)
vmovdqa %xmm3,%xmm14
vmovdqu 32+8(%rsp),%xmm7
jmp .Loop6x
.L6x_done:
vpxor 16+8(%rsp),%xmm8,%xmm8
vpxor %xmm4,%xmm8,%xmm8
.byte 0xf3,0xc3
.cfi_endproc
.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
.globl aesni_gcm_decrypt
.hidden aesni_gcm_decrypt
.type aesni_gcm_decrypt,@function
.align 32
aesni_gcm_decrypt:
.cfi_startproc
xorq %r10,%r10
cmpq $0x60,%rdx
jb .Lgcm_dec_abort
leaq (%rsp),%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
vzeroupper
vmovdqu (%r8),%xmm1
addq $-128,%rsp
movl 12(%r8),%ebx
leaq .Lbswap_mask(%rip),%r11
leaq -128(%rcx),%r14
movq $0xf80,%r15
vmovdqu (%r9),%xmm8
andq $-128,%rsp
vmovdqu (%r11),%xmm0
leaq 128(%rcx),%rcx
leaq 32+32(%r9),%r9
movl 240-128(%rcx),%ebp
vpshufb %xmm0,%xmm8,%xmm8
andq %r15,%r14
andq %rsp,%r15
subq %r14,%r15
jc .Ldec_no_key_aliasing
cmpq $768,%r15
jnc .Ldec_no_key_aliasing
subq %r15,%rsp
.Ldec_no_key_aliasing:
vmovdqu 80(%rdi),%xmm7
leaq (%rdi),%r14
vmovdqu 64(%rdi),%xmm4
leaq -192(%rdi,%rdx,1),%r15
vmovdqu 48(%rdi),%xmm5
shrq $4,%rdx
xorq %r10,%r10
vmovdqu 32(%rdi),%xmm6
vpshufb %xmm0,%xmm7,%xmm7
vmovdqu 16(%rdi),%xmm2
vpshufb %xmm0,%xmm4,%xmm4
vmovdqu (%rdi),%xmm3
vpshufb %xmm0,%xmm5,%xmm5
vmovdqu %xmm4,48(%rsp)
vpshufb %xmm0,%xmm6,%xmm6
vmovdqu %xmm5,64(%rsp)
vpshufb %xmm0,%xmm2,%xmm2
vmovdqu %xmm6,80(%rsp)
vpshufb %xmm0,%xmm3,%xmm3
vmovdqu %xmm2,96(%rsp)
vmovdqu %xmm3,112(%rsp)
call _aesni_ctr32_ghash_6x
vmovups %xmm9,-96(%rsi)
vmovups %xmm10,-80(%rsi)
vmovups %xmm11,-64(%rsi)
vmovups %xmm12,-48(%rsi)
vmovups %xmm13,-32(%rsi)
vmovups %xmm14,-16(%rsi)
vpshufb (%r11),%xmm8,%xmm8
vmovdqu %xmm8,-64(%r9)
vzeroupper
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lgcm_dec_abort:
movq %r10,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_gcm_decrypt,.-aesni_gcm_decrypt
.type _aesni_ctr32_6x,@function
.align 32
_aesni_ctr32_6x:
.cfi_startproc
vmovdqu 0-128(%rcx),%xmm4
vmovdqu 32(%r11),%xmm2
leaq -1(%rbp),%r13
vmovups 16-128(%rcx),%xmm15
leaq 32-128(%rcx),%r12
vpxor %xmm4,%xmm1,%xmm9
addl $100663296,%ebx
jc .Lhandle_ctr32_2
vpaddb %xmm2,%xmm1,%xmm10
vpaddb %xmm2,%xmm10,%xmm11
vpxor %xmm4,%xmm10,%xmm10
vpaddb %xmm2,%xmm11,%xmm12
vpxor %xmm4,%xmm11,%xmm11
vpaddb %xmm2,%xmm12,%xmm13
vpxor %xmm4,%xmm12,%xmm12
vpaddb %xmm2,%xmm13,%xmm14
vpxor %xmm4,%xmm13,%xmm13
vpaddb %xmm2,%xmm14,%xmm1
vpxor %xmm4,%xmm14,%xmm14
jmp .Loop_ctr32
.align 16
.Loop_ctr32:
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vmovups (%r12),%xmm15
leaq 16(%r12),%r12
decl %r13d
jnz .Loop_ctr32
vmovdqu (%r12),%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor 0(%rdi),%xmm3,%xmm4
vaesenc %xmm15,%xmm10,%xmm10
vpxor 16(%rdi),%xmm3,%xmm5
vaesenc %xmm15,%xmm11,%xmm11
vpxor 32(%rdi),%xmm3,%xmm6
vaesenc %xmm15,%xmm12,%xmm12
vpxor 48(%rdi),%xmm3,%xmm8
vaesenc %xmm15,%xmm13,%xmm13
vpxor 64(%rdi),%xmm3,%xmm2
vaesenc %xmm15,%xmm14,%xmm14
vpxor 80(%rdi),%xmm3,%xmm3
leaq 96(%rdi),%rdi
vaesenclast %xmm4,%xmm9,%xmm9
vaesenclast %xmm5,%xmm10,%xmm10
vaesenclast %xmm6,%xmm11,%xmm11
vaesenclast %xmm8,%xmm12,%xmm12
vaesenclast %xmm2,%xmm13,%xmm13
vaesenclast %xmm3,%xmm14,%xmm14
vmovups %xmm9,0(%rsi)
vmovups %xmm10,16(%rsi)
vmovups %xmm11,32(%rsi)
vmovups %xmm12,48(%rsi)
vmovups %xmm13,64(%rsi)
vmovups %xmm14,80(%rsi)
leaq 96(%rsi),%rsi
.byte 0xf3,0xc3
.align 32
.Lhandle_ctr32_2:
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
vpaddd 64(%r11),%xmm6,%xmm10
vpaddd %xmm5,%xmm6,%xmm11
vpaddd %xmm5,%xmm10,%xmm12
vpshufb %xmm0,%xmm10,%xmm10
vpaddd %xmm5,%xmm11,%xmm13
vpshufb %xmm0,%xmm11,%xmm11
vpxor %xmm4,%xmm10,%xmm10
vpaddd %xmm5,%xmm12,%xmm14
vpshufb %xmm0,%xmm12,%xmm12
vpxor %xmm4,%xmm11,%xmm11
vpaddd %xmm5,%xmm13,%xmm1
vpshufb %xmm0,%xmm13,%xmm13
vpxor %xmm4,%xmm12,%xmm12
vpshufb %xmm0,%xmm14,%xmm14
vpxor %xmm4,%xmm13,%xmm13
vpshufb %xmm0,%xmm1,%xmm1
vpxor %xmm4,%xmm14,%xmm14
jmp .Loop_ctr32
.cfi_endproc
.size _aesni_ctr32_6x,.-_aesni_ctr32_6x
.globl aesni_gcm_encrypt
.hidden aesni_gcm_encrypt
.type aesni_gcm_encrypt,@function
.align 32
aesni_gcm_encrypt:
.cfi_startproc
#ifdef BORINGSSL_DISPATCH_TEST
.extern BORINGSSL_function_hit
.hidden BORINGSSL_function_hit
movb $1,BORINGSSL_function_hit+2(%rip)
#endif
xorq %r10,%r10
cmpq $288,%rdx
jb .Lgcm_enc_abort
leaq (%rsp),%rax
.cfi_def_cfa_register %rax
pushq %rbx
.cfi_offset %rbx,-16
pushq %rbp
.cfi_offset %rbp,-24
pushq %r12
.cfi_offset %r12,-32
pushq %r13
.cfi_offset %r13,-40
pushq %r14
.cfi_offset %r14,-48
pushq %r15
.cfi_offset %r15,-56
vzeroupper
vmovdqu (%r8),%xmm1
addq $-128,%rsp
movl 12(%r8),%ebx
leaq .Lbswap_mask(%rip),%r11
leaq -128(%rcx),%r14
movq $0xf80,%r15
leaq 128(%rcx),%rcx
vmovdqu (%r11),%xmm0
andq $-128,%rsp
movl 240-128(%rcx),%ebp
andq %r15,%r14
andq %rsp,%r15
subq %r14,%r15
jc .Lenc_no_key_aliasing
cmpq $768,%r15
jnc .Lenc_no_key_aliasing
subq %r15,%rsp
.Lenc_no_key_aliasing:
leaq (%rsi),%r14
leaq -192(%rsi,%rdx,1),%r15
shrq $4,%rdx
call _aesni_ctr32_6x
vpshufb %xmm0,%xmm9,%xmm8
vpshufb %xmm0,%xmm10,%xmm2
vmovdqu %xmm8,112(%rsp)
vpshufb %xmm0,%xmm11,%xmm4
vmovdqu %xmm2,96(%rsp)
vpshufb %xmm0,%xmm12,%xmm5
vmovdqu %xmm4,80(%rsp)
vpshufb %xmm0,%xmm13,%xmm6
vmovdqu %xmm5,64(%rsp)
vpshufb %xmm0,%xmm14,%xmm7
vmovdqu %xmm6,48(%rsp)
call _aesni_ctr32_6x
vmovdqu (%r9),%xmm8
leaq 32+32(%r9),%r9
subq $12,%rdx
movq $192,%r10
vpshufb %xmm0,%xmm8,%xmm8
call _aesni_ctr32_ghash_6x
vmovdqu 32(%rsp),%xmm7
vmovdqu (%r11),%xmm0
vmovdqu 0-32(%r9),%xmm3
vpunpckhqdq %xmm7,%xmm7,%xmm1
vmovdqu 32-32(%r9),%xmm15
vmovups %xmm9,-96(%rsi)
vpshufb %xmm0,%xmm9,%xmm9
vpxor %xmm7,%xmm1,%xmm1
vmovups %xmm10,-80(%rsi)
vpshufb %xmm0,%xmm10,%xmm10
vmovups %xmm11,-64(%rsi)
vpshufb %xmm0,%xmm11,%xmm11
vmovups %xmm12,-48(%rsi)
vpshufb %xmm0,%xmm12,%xmm12
vmovups %xmm13,-32(%rsi)
vpshufb %xmm0,%xmm13,%xmm13
vmovups %xmm14,-16(%rsi)
vpshufb %xmm0,%xmm14,%xmm14
vmovdqu %xmm9,16(%rsp)
vmovdqu 48(%rsp),%xmm6
vmovdqu 16-32(%r9),%xmm0
vpunpckhqdq %xmm6,%xmm6,%xmm2
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5
vpxor %xmm6,%xmm2,%xmm2
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
vmovdqu 64(%rsp),%xmm9
vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4
vmovdqu 48-32(%r9),%xmm3
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm9,%xmm9,%xmm5
vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6
vpxor %xmm9,%xmm5,%xmm5
vpxor %xmm7,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
vmovdqu 80-32(%r9),%xmm15
vpxor %xmm1,%xmm2,%xmm2
vmovdqu 80(%rsp),%xmm1
vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7
vmovdqu 64-32(%r9),%xmm0
vpxor %xmm4,%xmm7,%xmm7
vpunpckhqdq %xmm1,%xmm1,%xmm4
vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9
vpxor %xmm1,%xmm4,%xmm4
vpxor %xmm6,%xmm9,%xmm9
vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5
vpxor %xmm2,%xmm5,%xmm5
vmovdqu 96(%rsp),%xmm2
vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6
vmovdqu 96-32(%r9),%xmm3
vpxor %xmm7,%xmm6,%xmm6
vpunpckhqdq %xmm2,%xmm2,%xmm7
vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1
vpxor %xmm2,%xmm7,%xmm7
vpxor %xmm9,%xmm1,%xmm1
vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4
vmovdqu 128-32(%r9),%xmm15
vpxor %xmm5,%xmm4,%xmm4
vpxor 112(%rsp),%xmm8,%xmm8
vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5
vmovdqu 112-32(%r9),%xmm0
vpunpckhqdq %xmm8,%xmm8,%xmm9
vpxor %xmm6,%xmm5,%xmm5
vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2
vpxor %xmm8,%xmm9,%xmm9
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7
vpxor %xmm4,%xmm7,%xmm4
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6
vmovdqu 0-32(%r9),%xmm3
vpunpckhqdq %xmm14,%xmm14,%xmm1
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8
vpxor %xmm14,%xmm1,%xmm1
vpxor %xmm5,%xmm6,%xmm5
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9
vmovdqu 32-32(%r9),%xmm15
vpxor %xmm2,%xmm8,%xmm7
vpxor %xmm4,%xmm9,%xmm6
vmovdqu 16-32(%r9),%xmm0
vpxor %xmm5,%xmm7,%xmm9
vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4
vpxor %xmm9,%xmm6,%xmm6
vpunpckhqdq %xmm13,%xmm13,%xmm2
vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14
vpxor %xmm13,%xmm2,%xmm2
vpslldq $8,%xmm6,%xmm9
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
vpxor %xmm9,%xmm5,%xmm8
vpsrldq $8,%xmm6,%xmm6
vpxor %xmm6,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5
vmovdqu 48-32(%r9),%xmm3
vpxor %xmm4,%xmm5,%xmm5
vpunpckhqdq %xmm12,%xmm12,%xmm9
vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13
vpxor %xmm12,%xmm9,%xmm9
vpxor %xmm14,%xmm13,%xmm13
vpalignr $8,%xmm8,%xmm8,%xmm14
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
vmovdqu 80-32(%r9),%xmm15
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4
vmovdqu 64-32(%r9),%xmm0
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm11,%xmm11,%xmm1
vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12
vpxor %xmm11,%xmm1,%xmm1
vpxor %xmm13,%xmm12,%xmm12
vxorps 16(%rsp),%xmm7,%xmm7
vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9
vpxor %xmm2,%xmm9,%xmm9
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
vxorps %xmm14,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5
vmovdqu 96-32(%r9),%xmm3
vpxor %xmm4,%xmm5,%xmm5
vpunpckhqdq %xmm10,%xmm10,%xmm2
vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11
vpxor %xmm10,%xmm2,%xmm2
vpalignr $8,%xmm8,%xmm8,%xmm14
vpxor %xmm12,%xmm11,%xmm11
vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1
vmovdqu 128-32(%r9),%xmm15
vpxor %xmm9,%xmm1,%xmm1
vxorps %xmm7,%xmm14,%xmm14
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
vxorps %xmm14,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4
vmovdqu 112-32(%r9),%xmm0
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm8,%xmm8,%xmm9
vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10
vpxor %xmm8,%xmm9,%xmm9
vpxor %xmm11,%xmm10,%xmm10
vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7
vpxor %xmm4,%xmm5,%xmm5
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6
vpxor %xmm10,%xmm7,%xmm7
vpxor %xmm2,%xmm6,%xmm6
vpxor %xmm5,%xmm7,%xmm4
vpxor %xmm4,%xmm6,%xmm6
vpslldq $8,%xmm6,%xmm1
vmovdqu 16(%r11),%xmm3
vpsrldq $8,%xmm6,%xmm6
vpxor %xmm1,%xmm5,%xmm8
vpxor %xmm6,%xmm7,%xmm7
vpalignr $8,%xmm8,%xmm8,%xmm2
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
vpxor %xmm2,%xmm8,%xmm8
vpalignr $8,%xmm8,%xmm8,%xmm2
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
vpxor %xmm7,%xmm2,%xmm2
vpxor %xmm2,%xmm8,%xmm8
vpshufb (%r11),%xmm8,%xmm8
vmovdqu %xmm8,-64(%r9)
vzeroupper
movq -48(%rax),%r15
.cfi_restore %r15
movq -40(%rax),%r14
.cfi_restore %r14
movq -32(%rax),%r13
.cfi_restore %r13
movq -24(%rax),%r12
.cfi_restore %r12
movq -16(%rax),%rbp
.cfi_restore %rbp
movq -8(%rax),%rbx
.cfi_restore %rbx
leaq (%rax),%rsp
.cfi_def_cfa_register %rsp
.Lgcm_enc_abort:
movq %r10,%rax
.byte 0xf3,0xc3
.cfi_endproc
.size aesni_gcm_encrypt,.-aesni_gcm_encrypt
.align 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.Lpoly:
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
.Lone_msb:
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
.Ltwo_lsb:
.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.Lone_lsb:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 64
#endif
.section .note.GNU-stack,"",@progbits
#endif // defined(__x86_64__) && defined(__linux__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,857 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__x86_64__) && defined(__APPLE__)
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
.text
.p2align 5
_aesni_ctr32_ghash_6x:
vmovdqu 32(%r11),%xmm2
subq $6,%rdx
vpxor %xmm4,%xmm4,%xmm4
vmovdqu 0-128(%rcx),%xmm15
vpaddb %xmm2,%xmm1,%xmm10
vpaddb %xmm2,%xmm10,%xmm11
vpaddb %xmm2,%xmm11,%xmm12
vpaddb %xmm2,%xmm12,%xmm13
vpaddb %xmm2,%xmm13,%xmm14
vpxor %xmm15,%xmm1,%xmm9
vmovdqu %xmm4,16+8(%rsp)
jmp L$oop6x
.p2align 5
L$oop6x:
addl $100663296,%ebx
jc L$handle_ctr32
vmovdqu 0-32(%r9),%xmm3
vpaddb %xmm2,%xmm14,%xmm1
vpxor %xmm15,%xmm10,%xmm10
vpxor %xmm15,%xmm11,%xmm11
L$resume_ctr32:
vmovdqu %xmm1,(%r8)
vpclmulqdq $0x10,%xmm3,%xmm7,%xmm5
vpxor %xmm15,%xmm12,%xmm12
vmovups 16-128(%rcx),%xmm2
vpclmulqdq $0x01,%xmm3,%xmm7,%xmm6
xorq %r12,%r12
cmpq %r14,%r15
vaesenc %xmm2,%xmm9,%xmm9
vmovdqu 48+8(%rsp),%xmm0
vpxor %xmm15,%xmm13,%xmm13
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm1
vaesenc %xmm2,%xmm10,%xmm10
vpxor %xmm15,%xmm14,%xmm14
setnc %r12b
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
vaesenc %xmm2,%xmm11,%xmm11
vmovdqu 16-32(%r9),%xmm3
negq %r12
vaesenc %xmm2,%xmm12,%xmm12
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm3,%xmm0,%xmm5
vpxor %xmm4,%xmm8,%xmm8
vaesenc %xmm2,%xmm13,%xmm13
vpxor %xmm5,%xmm1,%xmm4
andq $0x60,%r12
vmovups 32-128(%rcx),%xmm15
vpclmulqdq $0x10,%xmm3,%xmm0,%xmm1
vaesenc %xmm2,%xmm14,%xmm14
vpclmulqdq $0x01,%xmm3,%xmm0,%xmm2
leaq (%r14,%r12,1),%r14
vaesenc %xmm15,%xmm9,%xmm9
vpxor 16+8(%rsp),%xmm8,%xmm8
vpclmulqdq $0x11,%xmm3,%xmm0,%xmm3
vmovdqu 64+8(%rsp),%xmm0
vaesenc %xmm15,%xmm10,%xmm10
movbeq 88(%r14),%r13
vaesenc %xmm15,%xmm11,%xmm11
movbeq 80(%r14),%r12
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,32+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,40+8(%rsp)
vmovdqu 48-32(%r9),%xmm5
vaesenc %xmm15,%xmm14,%xmm14
vmovups 48-128(%rcx),%xmm15
vpxor %xmm1,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm5,%xmm0,%xmm1
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm2,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm5,%xmm0,%xmm2
vaesenc %xmm15,%xmm10,%xmm10
vpxor %xmm3,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm5,%xmm0,%xmm3
vaesenc %xmm15,%xmm11,%xmm11
vpclmulqdq $0x11,%xmm5,%xmm0,%xmm5
vmovdqu 80+8(%rsp),%xmm0
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vpxor %xmm1,%xmm4,%xmm4
vmovdqu 64-32(%r9),%xmm1
vaesenc %xmm15,%xmm14,%xmm14
vmovups 64-128(%rcx),%xmm15
vpxor %xmm2,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm1,%xmm0,%xmm2
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm3,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm1,%xmm0,%xmm3
vaesenc %xmm15,%xmm10,%xmm10
movbeq 72(%r14),%r13
vpxor %xmm5,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm1,%xmm0,%xmm5
vaesenc %xmm15,%xmm11,%xmm11
movbeq 64(%r14),%r12
vpclmulqdq $0x11,%xmm1,%xmm0,%xmm1
vmovdqu 96+8(%rsp),%xmm0
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,48+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,56+8(%rsp)
vpxor %xmm2,%xmm4,%xmm4
vmovdqu 96-32(%r9),%xmm2
vaesenc %xmm15,%xmm14,%xmm14
vmovups 80-128(%rcx),%xmm15
vpxor %xmm3,%xmm6,%xmm6
vpclmulqdq $0x00,%xmm2,%xmm0,%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm2,%xmm0,%xmm5
vaesenc %xmm15,%xmm10,%xmm10
movbeq 56(%r14),%r13
vpxor %xmm1,%xmm7,%xmm7
vpclmulqdq $0x01,%xmm2,%xmm0,%xmm1
vpxor 112+8(%rsp),%xmm8,%xmm8
vaesenc %xmm15,%xmm11,%xmm11
movbeq 48(%r14),%r12
vpclmulqdq $0x11,%xmm2,%xmm0,%xmm2
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,64+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,72+8(%rsp)
vpxor %xmm3,%xmm4,%xmm4
vmovdqu 112-32(%r9),%xmm3
vaesenc %xmm15,%xmm14,%xmm14
vmovups 96-128(%rcx),%xmm15
vpxor %xmm5,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm5
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm1,%xmm6,%xmm6
vpclmulqdq $0x01,%xmm3,%xmm8,%xmm1
vaesenc %xmm15,%xmm10,%xmm10
movbeq 40(%r14),%r13
vpxor %xmm2,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm3,%xmm8,%xmm2
vaesenc %xmm15,%xmm11,%xmm11
movbeq 32(%r14),%r12
vpclmulqdq $0x11,%xmm3,%xmm8,%xmm8
vaesenc %xmm15,%xmm12,%xmm12
movq %r13,80+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
movq %r12,88+8(%rsp)
vpxor %xmm5,%xmm6,%xmm6
vaesenc %xmm15,%xmm14,%xmm14
vpxor %xmm1,%xmm6,%xmm6
vmovups 112-128(%rcx),%xmm15
vpslldq $8,%xmm6,%xmm5
vpxor %xmm2,%xmm4,%xmm4
vmovdqu 16(%r11),%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor %xmm8,%xmm7,%xmm7
vaesenc %xmm15,%xmm10,%xmm10
vpxor %xmm5,%xmm4,%xmm4
movbeq 24(%r14),%r13
vaesenc %xmm15,%xmm11,%xmm11
movbeq 16(%r14),%r12
vpalignr $8,%xmm4,%xmm4,%xmm0
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
movq %r13,96+8(%rsp)
vaesenc %xmm15,%xmm12,%xmm12
movq %r12,104+8(%rsp)
vaesenc %xmm15,%xmm13,%xmm13
vmovups 128-128(%rcx),%xmm1
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vmovups 144-128(%rcx),%xmm15
vaesenc %xmm1,%xmm10,%xmm10
vpsrldq $8,%xmm6,%xmm6
vaesenc %xmm1,%xmm11,%xmm11
vpxor %xmm6,%xmm7,%xmm7
vaesenc %xmm1,%xmm12,%xmm12
vpxor %xmm0,%xmm4,%xmm4
movbeq 8(%r14),%r13
vaesenc %xmm1,%xmm13,%xmm13
movbeq 0(%r14),%r12
vaesenc %xmm1,%xmm14,%xmm14
vmovups 160-128(%rcx),%xmm1
cmpl $11,%ebp
jb L$enc_tail
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vaesenc %xmm1,%xmm10,%xmm10
vaesenc %xmm1,%xmm11,%xmm11
vaesenc %xmm1,%xmm12,%xmm12
vaesenc %xmm1,%xmm13,%xmm13
vmovups 176-128(%rcx),%xmm15
vaesenc %xmm1,%xmm14,%xmm14
vmovups 192-128(%rcx),%xmm1
je L$enc_tail
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vaesenc %xmm1,%xmm9,%xmm9
vaesenc %xmm1,%xmm10,%xmm10
vaesenc %xmm1,%xmm11,%xmm11
vaesenc %xmm1,%xmm12,%xmm12
vaesenc %xmm1,%xmm13,%xmm13
vmovups 208-128(%rcx),%xmm15
vaesenc %xmm1,%xmm14,%xmm14
vmovups 224-128(%rcx),%xmm1
jmp L$enc_tail
.p2align 5
L$handle_ctr32:
vmovdqu (%r11),%xmm0
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
vpaddd 64(%r11),%xmm6,%xmm10
vpaddd %xmm5,%xmm6,%xmm11
vmovdqu 0-32(%r9),%xmm3
vpaddd %xmm5,%xmm10,%xmm12
vpshufb %xmm0,%xmm10,%xmm10
vpaddd %xmm5,%xmm11,%xmm13
vpshufb %xmm0,%xmm11,%xmm11
vpxor %xmm15,%xmm10,%xmm10
vpaddd %xmm5,%xmm12,%xmm14
vpshufb %xmm0,%xmm12,%xmm12
vpxor %xmm15,%xmm11,%xmm11
vpaddd %xmm5,%xmm13,%xmm1
vpshufb %xmm0,%xmm13,%xmm13
vpshufb %xmm0,%xmm14,%xmm14
vpshufb %xmm0,%xmm1,%xmm1
jmp L$resume_ctr32
.p2align 5
L$enc_tail:
vaesenc %xmm15,%xmm9,%xmm9
vmovdqu %xmm7,16+8(%rsp)
vpalignr $8,%xmm4,%xmm4,%xmm8
vaesenc %xmm15,%xmm10,%xmm10
vpclmulqdq $0x10,%xmm3,%xmm4,%xmm4
vpxor 0(%rdi),%xmm1,%xmm2
vaesenc %xmm15,%xmm11,%xmm11
vpxor 16(%rdi),%xmm1,%xmm0
vaesenc %xmm15,%xmm12,%xmm12
vpxor 32(%rdi),%xmm1,%xmm5
vaesenc %xmm15,%xmm13,%xmm13
vpxor 48(%rdi),%xmm1,%xmm6
vaesenc %xmm15,%xmm14,%xmm14
vpxor 64(%rdi),%xmm1,%xmm7
vpxor 80(%rdi),%xmm1,%xmm3
vmovdqu (%r8),%xmm1
vaesenclast %xmm2,%xmm9,%xmm9
vmovdqu 32(%r11),%xmm2
vaesenclast %xmm0,%xmm10,%xmm10
vpaddb %xmm2,%xmm1,%xmm0
movq %r13,112+8(%rsp)
leaq 96(%rdi),%rdi
vaesenclast %xmm5,%xmm11,%xmm11
vpaddb %xmm2,%xmm0,%xmm5
movq %r12,120+8(%rsp)
leaq 96(%rsi),%rsi
vmovdqu 0-128(%rcx),%xmm15
vaesenclast %xmm6,%xmm12,%xmm12
vpaddb %xmm2,%xmm5,%xmm6
vaesenclast %xmm7,%xmm13,%xmm13
vpaddb %xmm2,%xmm6,%xmm7
vaesenclast %xmm3,%xmm14,%xmm14
vpaddb %xmm2,%xmm7,%xmm3
addq $0x60,%r10
subq $0x6,%rdx
jc L$6x_done
vmovups %xmm9,-96(%rsi)
vpxor %xmm15,%xmm1,%xmm9
vmovups %xmm10,-80(%rsi)
vmovdqa %xmm0,%xmm10
vmovups %xmm11,-64(%rsi)
vmovdqa %xmm5,%xmm11
vmovups %xmm12,-48(%rsi)
vmovdqa %xmm6,%xmm12
vmovups %xmm13,-32(%rsi)
vmovdqa %xmm7,%xmm13
vmovups %xmm14,-16(%rsi)
vmovdqa %xmm3,%xmm14
vmovdqu 32+8(%rsp),%xmm7
jmp L$oop6x
L$6x_done:
vpxor 16+8(%rsp),%xmm8,%xmm8
vpxor %xmm4,%xmm8,%xmm8
.byte 0xf3,0xc3
.globl _aesni_gcm_decrypt
.private_extern _aesni_gcm_decrypt
.p2align 5
_aesni_gcm_decrypt:
xorq %r10,%r10
cmpq $0x60,%rdx
jb L$gcm_dec_abort
leaq (%rsp),%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
vzeroupper
vmovdqu (%r8),%xmm1
addq $-128,%rsp
movl 12(%r8),%ebx
leaq L$bswap_mask(%rip),%r11
leaq -128(%rcx),%r14
movq $0xf80,%r15
vmovdqu (%r9),%xmm8
andq $-128,%rsp
vmovdqu (%r11),%xmm0
leaq 128(%rcx),%rcx
leaq 32+32(%r9),%r9
movl 240-128(%rcx),%ebp
vpshufb %xmm0,%xmm8,%xmm8
andq %r15,%r14
andq %rsp,%r15
subq %r14,%r15
jc L$dec_no_key_aliasing
cmpq $768,%r15
jnc L$dec_no_key_aliasing
subq %r15,%rsp
L$dec_no_key_aliasing:
vmovdqu 80(%rdi),%xmm7
leaq (%rdi),%r14
vmovdqu 64(%rdi),%xmm4
leaq -192(%rdi,%rdx,1),%r15
vmovdqu 48(%rdi),%xmm5
shrq $4,%rdx
xorq %r10,%r10
vmovdqu 32(%rdi),%xmm6
vpshufb %xmm0,%xmm7,%xmm7
vmovdqu 16(%rdi),%xmm2
vpshufb %xmm0,%xmm4,%xmm4
vmovdqu (%rdi),%xmm3
vpshufb %xmm0,%xmm5,%xmm5
vmovdqu %xmm4,48(%rsp)
vpshufb %xmm0,%xmm6,%xmm6
vmovdqu %xmm5,64(%rsp)
vpshufb %xmm0,%xmm2,%xmm2
vmovdqu %xmm6,80(%rsp)
vpshufb %xmm0,%xmm3,%xmm3
vmovdqu %xmm2,96(%rsp)
vmovdqu %xmm3,112(%rsp)
call _aesni_ctr32_ghash_6x
vmovups %xmm9,-96(%rsi)
vmovups %xmm10,-80(%rsi)
vmovups %xmm11,-64(%rsi)
vmovups %xmm12,-48(%rsi)
vmovups %xmm13,-32(%rsi)
vmovups %xmm14,-16(%rsi)
vpshufb (%r11),%xmm8,%xmm8
vmovdqu %xmm8,-64(%r9)
vzeroupper
movq -48(%rax),%r15
movq -40(%rax),%r14
movq -32(%rax),%r13
movq -24(%rax),%r12
movq -16(%rax),%rbp
movq -8(%rax),%rbx
leaq (%rax),%rsp
L$gcm_dec_abort:
movq %r10,%rax
.byte 0xf3,0xc3
.p2align 5
_aesni_ctr32_6x:
vmovdqu 0-128(%rcx),%xmm4
vmovdqu 32(%r11),%xmm2
leaq -1(%rbp),%r13
vmovups 16-128(%rcx),%xmm15
leaq 32-128(%rcx),%r12
vpxor %xmm4,%xmm1,%xmm9
addl $100663296,%ebx
jc L$handle_ctr32_2
vpaddb %xmm2,%xmm1,%xmm10
vpaddb %xmm2,%xmm10,%xmm11
vpxor %xmm4,%xmm10,%xmm10
vpaddb %xmm2,%xmm11,%xmm12
vpxor %xmm4,%xmm11,%xmm11
vpaddb %xmm2,%xmm12,%xmm13
vpxor %xmm4,%xmm12,%xmm12
vpaddb %xmm2,%xmm13,%xmm14
vpxor %xmm4,%xmm13,%xmm13
vpaddb %xmm2,%xmm14,%xmm1
vpxor %xmm4,%xmm14,%xmm14
jmp L$oop_ctr32
.p2align 4
L$oop_ctr32:
vaesenc %xmm15,%xmm9,%xmm9
vaesenc %xmm15,%xmm10,%xmm10
vaesenc %xmm15,%xmm11,%xmm11
vaesenc %xmm15,%xmm12,%xmm12
vaesenc %xmm15,%xmm13,%xmm13
vaesenc %xmm15,%xmm14,%xmm14
vmovups (%r12),%xmm15
leaq 16(%r12),%r12
decl %r13d
jnz L$oop_ctr32
vmovdqu (%r12),%xmm3
vaesenc %xmm15,%xmm9,%xmm9
vpxor 0(%rdi),%xmm3,%xmm4
vaesenc %xmm15,%xmm10,%xmm10
vpxor 16(%rdi),%xmm3,%xmm5
vaesenc %xmm15,%xmm11,%xmm11
vpxor 32(%rdi),%xmm3,%xmm6
vaesenc %xmm15,%xmm12,%xmm12
vpxor 48(%rdi),%xmm3,%xmm8
vaesenc %xmm15,%xmm13,%xmm13
vpxor 64(%rdi),%xmm3,%xmm2
vaesenc %xmm15,%xmm14,%xmm14
vpxor 80(%rdi),%xmm3,%xmm3
leaq 96(%rdi),%rdi
vaesenclast %xmm4,%xmm9,%xmm9
vaesenclast %xmm5,%xmm10,%xmm10
vaesenclast %xmm6,%xmm11,%xmm11
vaesenclast %xmm8,%xmm12,%xmm12
vaesenclast %xmm2,%xmm13,%xmm13
vaesenclast %xmm3,%xmm14,%xmm14
vmovups %xmm9,0(%rsi)
vmovups %xmm10,16(%rsi)
vmovups %xmm11,32(%rsi)
vmovups %xmm12,48(%rsi)
vmovups %xmm13,64(%rsi)
vmovups %xmm14,80(%rsi)
leaq 96(%rsi),%rsi
.byte 0xf3,0xc3
.p2align 5
L$handle_ctr32_2:
vpshufb %xmm0,%xmm1,%xmm6
vmovdqu 48(%r11),%xmm5
vpaddd 64(%r11),%xmm6,%xmm10
vpaddd %xmm5,%xmm6,%xmm11
vpaddd %xmm5,%xmm10,%xmm12
vpshufb %xmm0,%xmm10,%xmm10
vpaddd %xmm5,%xmm11,%xmm13
vpshufb %xmm0,%xmm11,%xmm11
vpxor %xmm4,%xmm10,%xmm10
vpaddd %xmm5,%xmm12,%xmm14
vpshufb %xmm0,%xmm12,%xmm12
vpxor %xmm4,%xmm11,%xmm11
vpaddd %xmm5,%xmm13,%xmm1
vpshufb %xmm0,%xmm13,%xmm13
vpxor %xmm4,%xmm12,%xmm12
vpshufb %xmm0,%xmm14,%xmm14
vpxor %xmm4,%xmm13,%xmm13
vpshufb %xmm0,%xmm1,%xmm1
vpxor %xmm4,%xmm14,%xmm14
jmp L$oop_ctr32
.globl _aesni_gcm_encrypt
.private_extern _aesni_gcm_encrypt
.p2align 5
_aesni_gcm_encrypt:
#ifdef BORINGSSL_DISPATCH_TEST
movb $1,_BORINGSSL_function_hit+2(%rip)
#endif
xorq %r10,%r10
cmpq $288,%rdx
jb L$gcm_enc_abort
leaq (%rsp),%rax
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
vzeroupper
vmovdqu (%r8),%xmm1
addq $-128,%rsp
movl 12(%r8),%ebx
leaq L$bswap_mask(%rip),%r11
leaq -128(%rcx),%r14
movq $0xf80,%r15
leaq 128(%rcx),%rcx
vmovdqu (%r11),%xmm0
andq $-128,%rsp
movl 240-128(%rcx),%ebp
andq %r15,%r14
andq %rsp,%r15
subq %r14,%r15
jc L$enc_no_key_aliasing
cmpq $768,%r15
jnc L$enc_no_key_aliasing
subq %r15,%rsp
L$enc_no_key_aliasing:
leaq (%rsi),%r14
leaq -192(%rsi,%rdx,1),%r15
shrq $4,%rdx
call _aesni_ctr32_6x
vpshufb %xmm0,%xmm9,%xmm8
vpshufb %xmm0,%xmm10,%xmm2
vmovdqu %xmm8,112(%rsp)
vpshufb %xmm0,%xmm11,%xmm4
vmovdqu %xmm2,96(%rsp)
vpshufb %xmm0,%xmm12,%xmm5
vmovdqu %xmm4,80(%rsp)
vpshufb %xmm0,%xmm13,%xmm6
vmovdqu %xmm5,64(%rsp)
vpshufb %xmm0,%xmm14,%xmm7
vmovdqu %xmm6,48(%rsp)
call _aesni_ctr32_6x
vmovdqu (%r9),%xmm8
leaq 32+32(%r9),%r9
subq $12,%rdx
movq $192,%r10
vpshufb %xmm0,%xmm8,%xmm8
call _aesni_ctr32_ghash_6x
vmovdqu 32(%rsp),%xmm7
vmovdqu (%r11),%xmm0
vmovdqu 0-32(%r9),%xmm3
vpunpckhqdq %xmm7,%xmm7,%xmm1
vmovdqu 32-32(%r9),%xmm15
vmovups %xmm9,-96(%rsi)
vpshufb %xmm0,%xmm9,%xmm9
vpxor %xmm7,%xmm1,%xmm1
vmovups %xmm10,-80(%rsi)
vpshufb %xmm0,%xmm10,%xmm10
vmovups %xmm11,-64(%rsi)
vpshufb %xmm0,%xmm11,%xmm11
vmovups %xmm12,-48(%rsi)
vpshufb %xmm0,%xmm12,%xmm12
vmovups %xmm13,-32(%rsi)
vpshufb %xmm0,%xmm13,%xmm13
vmovups %xmm14,-16(%rsi)
vpshufb %xmm0,%xmm14,%xmm14
vmovdqu %xmm9,16(%rsp)
vmovdqu 48(%rsp),%xmm6
vmovdqu 16-32(%r9),%xmm0
vpunpckhqdq %xmm6,%xmm6,%xmm2
vpclmulqdq $0x00,%xmm3,%xmm7,%xmm5
vpxor %xmm6,%xmm2,%xmm2
vpclmulqdq $0x11,%xmm3,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
vmovdqu 64(%rsp),%xmm9
vpclmulqdq $0x00,%xmm0,%xmm6,%xmm4
vmovdqu 48-32(%r9),%xmm3
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm9,%xmm9,%xmm5
vpclmulqdq $0x11,%xmm0,%xmm6,%xmm6
vpxor %xmm9,%xmm5,%xmm5
vpxor %xmm7,%xmm6,%xmm6
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
vmovdqu 80-32(%r9),%xmm15
vpxor %xmm1,%xmm2,%xmm2
vmovdqu 80(%rsp),%xmm1
vpclmulqdq $0x00,%xmm3,%xmm9,%xmm7
vmovdqu 64-32(%r9),%xmm0
vpxor %xmm4,%xmm7,%xmm7
vpunpckhqdq %xmm1,%xmm1,%xmm4
vpclmulqdq $0x11,%xmm3,%xmm9,%xmm9
vpxor %xmm1,%xmm4,%xmm4
vpxor %xmm6,%xmm9,%xmm9
vpclmulqdq $0x00,%xmm15,%xmm5,%xmm5
vpxor %xmm2,%xmm5,%xmm5
vmovdqu 96(%rsp),%xmm2
vpclmulqdq $0x00,%xmm0,%xmm1,%xmm6
vmovdqu 96-32(%r9),%xmm3
vpxor %xmm7,%xmm6,%xmm6
vpunpckhqdq %xmm2,%xmm2,%xmm7
vpclmulqdq $0x11,%xmm0,%xmm1,%xmm1
vpxor %xmm2,%xmm7,%xmm7
vpxor %xmm9,%xmm1,%xmm1
vpclmulqdq $0x10,%xmm15,%xmm4,%xmm4
vmovdqu 128-32(%r9),%xmm15
vpxor %xmm5,%xmm4,%xmm4
vpxor 112(%rsp),%xmm8,%xmm8
vpclmulqdq $0x00,%xmm3,%xmm2,%xmm5
vmovdqu 112-32(%r9),%xmm0
vpunpckhqdq %xmm8,%xmm8,%xmm9
vpxor %xmm6,%xmm5,%xmm5
vpclmulqdq $0x11,%xmm3,%xmm2,%xmm2
vpxor %xmm8,%xmm9,%xmm9
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm15,%xmm7,%xmm7
vpxor %xmm4,%xmm7,%xmm4
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm6
vmovdqu 0-32(%r9),%xmm3
vpunpckhqdq %xmm14,%xmm14,%xmm1
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm8
vpxor %xmm14,%xmm1,%xmm1
vpxor %xmm5,%xmm6,%xmm5
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm9
vmovdqu 32-32(%r9),%xmm15
vpxor %xmm2,%xmm8,%xmm7
vpxor %xmm4,%xmm9,%xmm6
vmovdqu 16-32(%r9),%xmm0
vpxor %xmm5,%xmm7,%xmm9
vpclmulqdq $0x00,%xmm3,%xmm14,%xmm4
vpxor %xmm9,%xmm6,%xmm6
vpunpckhqdq %xmm13,%xmm13,%xmm2
vpclmulqdq $0x11,%xmm3,%xmm14,%xmm14
vpxor %xmm13,%xmm2,%xmm2
vpslldq $8,%xmm6,%xmm9
vpclmulqdq $0x00,%xmm15,%xmm1,%xmm1
vpxor %xmm9,%xmm5,%xmm8
vpsrldq $8,%xmm6,%xmm6
vpxor %xmm6,%xmm7,%xmm7
vpclmulqdq $0x00,%xmm0,%xmm13,%xmm5
vmovdqu 48-32(%r9),%xmm3
vpxor %xmm4,%xmm5,%xmm5
vpunpckhqdq %xmm12,%xmm12,%xmm9
vpclmulqdq $0x11,%xmm0,%xmm13,%xmm13
vpxor %xmm12,%xmm9,%xmm9
vpxor %xmm14,%xmm13,%xmm13
vpalignr $8,%xmm8,%xmm8,%xmm14
vpclmulqdq $0x10,%xmm15,%xmm2,%xmm2
vmovdqu 80-32(%r9),%xmm15
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm3,%xmm12,%xmm4
vmovdqu 64-32(%r9),%xmm0
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm11,%xmm11,%xmm1
vpclmulqdq $0x11,%xmm3,%xmm12,%xmm12
vpxor %xmm11,%xmm1,%xmm1
vpxor %xmm13,%xmm12,%xmm12
vxorps 16(%rsp),%xmm7,%xmm7
vpclmulqdq $0x00,%xmm15,%xmm9,%xmm9
vpxor %xmm2,%xmm9,%xmm9
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
vxorps %xmm14,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm0,%xmm11,%xmm5
vmovdqu 96-32(%r9),%xmm3
vpxor %xmm4,%xmm5,%xmm5
vpunpckhqdq %xmm10,%xmm10,%xmm2
vpclmulqdq $0x11,%xmm0,%xmm11,%xmm11
vpxor %xmm10,%xmm2,%xmm2
vpalignr $8,%xmm8,%xmm8,%xmm14
vpxor %xmm12,%xmm11,%xmm11
vpclmulqdq $0x10,%xmm15,%xmm1,%xmm1
vmovdqu 128-32(%r9),%xmm15
vpxor %xmm9,%xmm1,%xmm1
vxorps %xmm7,%xmm14,%xmm14
vpclmulqdq $0x10,16(%r11),%xmm8,%xmm8
vxorps %xmm14,%xmm8,%xmm8
vpclmulqdq $0x00,%xmm3,%xmm10,%xmm4
vmovdqu 112-32(%r9),%xmm0
vpxor %xmm5,%xmm4,%xmm4
vpunpckhqdq %xmm8,%xmm8,%xmm9
vpclmulqdq $0x11,%xmm3,%xmm10,%xmm10
vpxor %xmm8,%xmm9,%xmm9
vpxor %xmm11,%xmm10,%xmm10
vpclmulqdq $0x00,%xmm15,%xmm2,%xmm2
vpxor %xmm1,%xmm2,%xmm2
vpclmulqdq $0x00,%xmm0,%xmm8,%xmm5
vpclmulqdq $0x11,%xmm0,%xmm8,%xmm7
vpxor %xmm4,%xmm5,%xmm5
vpclmulqdq $0x10,%xmm15,%xmm9,%xmm6
vpxor %xmm10,%xmm7,%xmm7
vpxor %xmm2,%xmm6,%xmm6
vpxor %xmm5,%xmm7,%xmm4
vpxor %xmm4,%xmm6,%xmm6
vpslldq $8,%xmm6,%xmm1
vmovdqu 16(%r11),%xmm3
vpsrldq $8,%xmm6,%xmm6
vpxor %xmm1,%xmm5,%xmm8
vpxor %xmm6,%xmm7,%xmm7
vpalignr $8,%xmm8,%xmm8,%xmm2
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
vpxor %xmm2,%xmm8,%xmm8
vpalignr $8,%xmm8,%xmm8,%xmm2
vpclmulqdq $0x10,%xmm3,%xmm8,%xmm8
vpxor %xmm7,%xmm2,%xmm2
vpxor %xmm2,%xmm8,%xmm8
vpshufb (%r11),%xmm8,%xmm8
vmovdqu %xmm8,-64(%r9)
vzeroupper
movq -48(%rax),%r15
movq -40(%rax),%r14
movq -32(%rax),%r13
movq -24(%rax),%r12
movq -16(%rax),%rbp
movq -8(%rax),%rbx
leaq (%rax),%rsp
L$gcm_enc_abort:
movq %r10,%rax
.byte 0xf3,0xc3
.p2align 6
L$bswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
L$poly:
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
L$one_msb:
.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
L$two_lsb:
.byte 2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
L$one_lsb:
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
.byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.p2align 6
#endif
#endif // defined(__x86_64__) && defined(__APPLE__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,797 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__arm__) && defined(__APPLE__)
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
#include <CBigNumBoringSSL_arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.code 32
#undef __thumb2__
.align 5
Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.text
.globl _aes_hw_set_encrypt_key
.private_extern _aes_hw_set_encrypt_key
#ifdef __thumb2__
.thumb_func _aes_hw_set_encrypt_key
#endif
.align 5
_aes_hw_set_encrypt_key:
Lenc_key:
mov r3,#-1
cmp r0,#0
beq Lenc_key_abort
cmp r2,#0
beq Lenc_key_abort
mov r3,#-2
cmp r1,#128
blt Lenc_key_abort
cmp r1,#256
bgt Lenc_key_abort
tst r1,#0x3f
bne Lenc_key_abort
adr r3,Lrcon
cmp r1,#192
veor q0,q0,q0
vld1.8 {q3},[r0]!
mov r1,#8 @ reuse r1
vld1.32 {q1,q2},[r3]!
blt Loop128
beq L192
b L256
.align 4
Loop128:
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
bne Loop128
vld1.32 {q1},[r3]
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
veor q3,q3,q10
vst1.32 {q3},[r2]
add r2,r2,#0x50
mov r12,#10
b Ldone
.align 4
L192:
vld1.8 {d16},[r0]!
vmov.i8 q10,#8 @ borrow q10
vst1.32 {q3},[r2]!
vsub.i8 q2,q2,q10 @ adjust the mask
Loop192:
vtbl.8 d20,{q8},d4
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
vst1.32 {d16},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vdup.32 q9,d7[1]
veor q9,q9,q8
veor q10,q10,q1
vext.8 q8,q0,q8,#12
vshl.u8 q1,q1,#1
veor q8,q8,q9
veor q3,q3,q10
veor q8,q8,q10
vst1.32 {q3},[r2]!
bne Loop192
mov r12,#12
add r2,r2,#0x20
b Ldone
.align 4
L256:
vld1.8 {q8},[r0]
mov r1,#7
mov r12,#14
vst1.32 {q3},[r2]!
Loop256:
vtbl.8 d20,{q8},d4
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
vst1.32 {q8},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
vst1.32 {q3},[r2]!
beq Ldone
vdup.32 q10,d7[1]
vext.8 q9,q0,q8,#12
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q8,q8,q9
vext.8 q9,q0,q9,#12
veor q8,q8,q9
vext.8 q9,q0,q9,#12
veor q8,q8,q9
veor q8,q8,q10
b Loop256
Ldone:
str r12,[r2]
mov r3,#0
Lenc_key_abort:
mov r0,r3 @ return value
bx lr
.globl _aes_hw_set_decrypt_key
.private_extern _aes_hw_set_decrypt_key
#ifdef __thumb2__
.thumb_func _aes_hw_set_decrypt_key
#endif
.align 5
_aes_hw_set_decrypt_key:
stmdb sp!,{r4,lr}
bl Lenc_key
cmp r0,#0
bne Ldec_key_abort
sub r2,r2,#240 @ restore original r2
mov r4,#-16
add r0,r2,r12,lsl#4 @ end of key schedule
vld1.32 {q0},[r2]
vld1.32 {q1},[r0]
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
Loop_imc:
vld1.32 {q0},[r2]
vld1.32 {q1},[r0]
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
cmp r0,r2
bhi Loop_imc
vld1.32 {q0},[r2]
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
vst1.32 {q0},[r0]
eor r0,r0,r0 @ return value
Ldec_key_abort:
ldmia sp!,{r4,pc}
.globl _aes_hw_encrypt
.private_extern _aes_hw_encrypt
#ifdef __thumb2__
.thumb_func _aes_hw_encrypt
#endif
.align 5
_aes_hw_encrypt:
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
sub r3,r3,#2
vld1.32 {q1},[r2]!
Loop_enc:
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]!
subs r3,r3,#2
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q1},[r2]!
bgt Loop_enc
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
veor q2,q2,q0
vst1.8 {q2},[r1]
bx lr
.globl _aes_hw_decrypt
.private_extern _aes_hw_decrypt
#ifdef __thumb2__
.thumb_func _aes_hw_decrypt
#endif
.align 5
_aes_hw_decrypt:
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
sub r3,r3,#2
vld1.32 {q1},[r2]!
Loop_dec:
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]!
subs r3,r3,#2
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q1},[r2]!
bgt Loop_dec
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
veor q2,q2,q0
vst1.8 {q2},[r1]
bx lr
.globl _aes_hw_cbc_encrypt
.private_extern _aes_hw_cbc_encrypt
#ifdef __thumb2__
.thumb_func _aes_hw_cbc_encrypt
#endif
.align 5
_aes_hw_cbc_encrypt:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load remaining args
subs r2,r2,#16
mov r8,#16
blo Lcbc_abort
moveq r8,#0
cmp r5,#0 @ en- or decrypting?
ldr r5,[r3,#240]
and r2,r2,#-16
vld1.8 {q6},[r4]
vld1.8 {q0},[r0],r8
vld1.32 {q8,q9},[r3] @ load key schedule...
sub r5,r5,#6
add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
sub r5,r5,#2
vld1.32 {q10,q11},[r7]!
vld1.32 {q12,q13},[r7]!
vld1.32 {q14,q15},[r7]!
vld1.32 {q7},[r7]
add r7,r3,#32
mov r6,r5
beq Lcbc_dec
cmp r5,#2
veor q0,q0,q6
veor q5,q8,q7
beq Lcbc_enc128
vld1.32 {q2,q3},[r7]
add r7,r3,#16
add r6,r3,#16*4
add r12,r3,#16*5
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
add r14,r3,#16*6
add r3,r3,#16*7
b Lenter_cbc_enc
.align 4
Loop_cbc_enc:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vst1.8 {q6},[r1]!
Lenter_cbc_enc:
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q8},[r6]
cmp r5,#4
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r12]
beq Lcbc_enc192
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q8},[r14]
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r3]
nop
Lcbc_enc192:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
subs r2,r2,#16
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
moveq r8,#0
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.8 {q8},[r0],r8
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
veor q8,q8,q5
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs Loop_cbc_enc
vst1.8 {q6},[r1]!
b Lcbc_done
.align 5
Lcbc_enc128:
vld1.32 {q2,q3},[r7]
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
b Lenter_cbc_enc128
Loop_cbc_enc128:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vst1.8 {q6},[r1]!
Lenter_cbc_enc128:
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
subs r2,r2,#16
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
moveq r8,#0
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.8 {q8},[r0],r8
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
veor q8,q8,q5
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs Loop_cbc_enc128
vst1.8 {q6},[r1]!
b Lcbc_done
.align 5
Lcbc_dec:
vld1.8 {q10},[r0]!
subs r2,r2,#32 @ bias
add r6,r5,#2
vorr q3,q0,q0
vorr q1,q0,q0
vorr q11,q10,q10
blo Lcbc_dec_tail
vorr q1,q10,q10
vld1.8 {q10},[r0]!
vorr q2,q0,q0
vorr q3,q1,q1
vorr q11,q10,q10
Loop3x_cbc_dec:
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt Loop3x_cbc_dec
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q4,q6,q7
subs r2,r2,#0x30
veor q5,q2,q7
movlo r6,r2 @ r6, r6, is zero at this point
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q9,q3,q7
add r0,r0,r6 @ r0 is adjusted in such way that
@ at exit from the loop q1-q10
@ are loaded with last "words"
vorr q6,q11,q11
mov r7,r3
.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q2},[r0]!
.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q3},[r0]!
.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q11},[r0]!
.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
add r6,r5,#2
veor q4,q4,q0
veor q5,q5,q1
veor q10,q10,q9
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
vst1.8 {q4},[r1]!
vorr q0,q2,q2
vst1.8 {q5},[r1]!
vorr q1,q3,q3
vst1.8 {q10},[r1]!
vorr q10,q11,q11
bhs Loop3x_cbc_dec
cmn r2,#0x30
beq Lcbc_done
nop
Lcbc_dec_tail:
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt Lcbc_dec_tail
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
cmn r2,#0x20
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q5,q6,q7
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q9,q3,q7
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
beq Lcbc_dec_one
veor q5,q5,q1
veor q9,q9,q10
vorr q6,q11,q11
vst1.8 {q5},[r1]!
vst1.8 {q9},[r1]!
b Lcbc_done
Lcbc_dec_one:
veor q5,q5,q10
vorr q6,q11,q11
vst1.8 {q5},[r1]!
Lcbc_done:
vst1.8 {q6},[r4]
Lcbc_abort:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,pc}
.globl _aes_hw_ctr32_encrypt_blocks
.private_extern _aes_hw_ctr32_encrypt_blocks
#ifdef __thumb2__
.thumb_func _aes_hw_ctr32_encrypt_blocks
#endif
.align 5
_aes_hw_ctr32_encrypt_blocks:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldr r4, [ip] @ load remaining arg
ldr r5,[r3,#240]
ldr r8, [r4, #12]
vld1.32 {q0},[r4]
vld1.32 {q8,q9},[r3] @ load key schedule...
sub r5,r5,#4
mov r12,#16
cmp r2,#2
add r7,r3,r5,lsl#4 @ pointer to last 5 round keys
sub r5,r5,#2
vld1.32 {q12,q13},[r7]!
vld1.32 {q14,q15},[r7]!
vld1.32 {q7},[r7]
add r7,r3,#32
mov r6,r5
movlo r12,#0
#ifndef __ARMEB__
rev r8, r8
#endif
vorr q1,q0,q0
add r10, r8, #1
vorr q10,q0,q0
add r8, r8, #2
vorr q6,q0,q0
rev r10, r10
vmov.32 d3[1],r10
bls Lctr32_tail
rev r12, r8
sub r2,r2,#3 @ bias
vmov.32 d21[1],r12
b Loop3x_ctr32
.align 4
Loop3x_ctr32:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.32 {q9},[r7]!
bgt Loop3x_ctr32
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
vld1.8 {q2},[r0]!
vorr q0,q6,q6
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.8 {q3},[r0]!
vorr q1,q6,q6
.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
vld1.8 {q11},[r0]!
mov r7,r3
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
vorr q10,q6,q6
add r9,r8,#1
.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
veor q2,q2,q7
add r10,r8,#2
.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
veor q3,q3,q7
add r8,r8,#3
.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
veor q11,q11,q7
rev r9,r9
.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
vmov.32 d1[1], r9
rev r10,r10
.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
vmov.32 d3[1], r10
rev r12,r8
.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
vmov.32 d21[1], r12
subs r2,r2,#3
.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15
veor q2,q2,q4
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
vst1.8 {q2},[r1]!
veor q3,q3,q5
mov r6,r5
vst1.8 {q3},[r1]!
veor q11,q11,q9
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
vst1.8 {q11},[r1]!
bhs Loop3x_ctr32
adds r2,r2,#3
beq Lctr32_done
cmp r2,#1
mov r12,#16
moveq r12,#0
Lctr32_tail:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.32 {q9},[r7]!
bgt Lctr32_tail
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.8 {q2},[r0],r12
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.8 {q3},[r0]
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
veor q2,q2,q7
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
veor q3,q3,q7
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15
cmp r2,#1
veor q2,q2,q0
veor q3,q3,q1
vst1.8 {q2},[r1]!
beq Lctr32_done
vst1.8 {q3},[r1]
Lctr32_done:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
#endif
#endif // !OPENSSL_NO_ASM
#endif // defined(__arm__) && defined(__APPLE__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,788 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__arm__) && defined(__linux__)
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
#include <CBigNumBoringSSL_arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.arch armv7-a @ don't confuse not-so-latest binutils with argv8 :-)
.fpu neon
.code 32
#undef __thumb2__
.align 5
.Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.text
.globl aes_hw_set_encrypt_key
.hidden aes_hw_set_encrypt_key
.type aes_hw_set_encrypt_key,%function
.align 5
aes_hw_set_encrypt_key:
.Lenc_key:
mov r3,#-1
cmp r0,#0
beq .Lenc_key_abort
cmp r2,#0
beq .Lenc_key_abort
mov r3,#-2
cmp r1,#128
blt .Lenc_key_abort
cmp r1,#256
bgt .Lenc_key_abort
tst r1,#0x3f
bne .Lenc_key_abort
adr r3,.Lrcon
cmp r1,#192
veor q0,q0,q0
vld1.8 {q3},[r0]!
mov r1,#8 @ reuse r1
vld1.32 {q1,q2},[r3]!
blt .Loop128
beq .L192
b .L256
.align 4
.Loop128:
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
bne .Loop128
vld1.32 {q1},[r3]
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
vtbl.8 d20,{q3},d4
vtbl.8 d21,{q3},d5
vext.8 q9,q0,q3,#12
vst1.32 {q3},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
veor q3,q3,q10
vst1.32 {q3},[r2]
add r2,r2,#0x50
mov r12,#10
b .Ldone
.align 4
.L192:
vld1.8 {d16},[r0]!
vmov.i8 q10,#8 @ borrow q10
vst1.32 {q3},[r2]!
vsub.i8 q2,q2,q10 @ adjust the mask
.Loop192:
vtbl.8 d20,{q8},d4
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
vst1.32 {d16},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vdup.32 q9,d7[1]
veor q9,q9,q8
veor q10,q10,q1
vext.8 q8,q0,q8,#12
vshl.u8 q1,q1,#1
veor q8,q8,q9
veor q3,q3,q10
veor q8,q8,q10
vst1.32 {q3},[r2]!
bne .Loop192
mov r12,#12
add r2,r2,#0x20
b .Ldone
.align 4
.L256:
vld1.8 {q8},[r0]
mov r1,#7
mov r12,#14
vst1.32 {q3},[r2]!
.Loop256:
vtbl.8 d20,{q8},d4
vtbl.8 d21,{q8},d5
vext.8 q9,q0,q3,#12
vst1.32 {q8},[r2]!
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
subs r1,r1,#1
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q3,q3,q9
vext.8 q9,q0,q9,#12
veor q10,q10,q1
veor q3,q3,q9
vshl.u8 q1,q1,#1
veor q3,q3,q10
vst1.32 {q3},[r2]!
beq .Ldone
vdup.32 q10,d7[1]
vext.8 q9,q0,q8,#12
.byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0
veor q8,q8,q9
vext.8 q9,q0,q9,#12
veor q8,q8,q9
vext.8 q9,q0,q9,#12
veor q8,q8,q9
veor q8,q8,q10
b .Loop256
.Ldone:
str r12,[r2]
mov r3,#0
.Lenc_key_abort:
mov r0,r3 @ return value
bx lr
.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
.globl aes_hw_set_decrypt_key
.hidden aes_hw_set_decrypt_key
.type aes_hw_set_decrypt_key,%function
.align 5
aes_hw_set_decrypt_key:
stmdb sp!,{r4,lr}
bl .Lenc_key
cmp r0,#0
bne .Ldec_key_abort
sub r2,r2,#240 @ restore original r2
mov r4,#-16
add r0,r2,r12,lsl#4 @ end of key schedule
vld1.32 {q0},[r2]
vld1.32 {q1},[r0]
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
.Loop_imc:
vld1.32 {q0},[r2]
vld1.32 {q1},[r0]
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
vst1.32 {q0},[r0],r4
vst1.32 {q1},[r2]!
cmp r0,r2
bhi .Loop_imc
vld1.32 {q0},[r2]
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
vst1.32 {q0},[r0]
eor r0,r0,r0 @ return value
.Ldec_key_abort:
ldmia sp!,{r4,pc}
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
.globl aes_hw_encrypt
.hidden aes_hw_encrypt
.type aes_hw_encrypt,%function
.align 5
aes_hw_encrypt:
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
sub r3,r3,#2
vld1.32 {q1},[r2]!
.Loop_enc:
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]!
subs r3,r3,#2
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q1},[r2]!
bgt .Loop_enc
.byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0
.byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2
vld1.32 {q0},[r2]
.byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1
veor q2,q2,q0
vst1.8 {q2},[r1]
bx lr
.size aes_hw_encrypt,.-aes_hw_encrypt
.globl aes_hw_decrypt
.hidden aes_hw_decrypt
.type aes_hw_decrypt,%function
.align 5
aes_hw_decrypt:
ldr r3,[r2,#240]
vld1.32 {q0},[r2]!
vld1.8 {q2},[r0]
sub r3,r3,#2
vld1.32 {q1},[r2]!
.Loop_dec:
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]!
subs r3,r3,#2
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q1},[r2]!
bgt .Loop_dec
.byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0
.byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2
vld1.32 {q0},[r2]
.byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1
veor q2,q2,q0
vst1.8 {q2},[r1]
bx lr
.size aes_hw_decrypt,.-aes_hw_decrypt
.globl aes_hw_cbc_encrypt
.hidden aes_hw_cbc_encrypt
.type aes_hw_cbc_encrypt,%function
.align 5
aes_hw_cbc_encrypt:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load remaining args
subs r2,r2,#16
mov r8,#16
blo .Lcbc_abort
moveq r8,#0
cmp r5,#0 @ en- or decrypting?
ldr r5,[r3,#240]
and r2,r2,#-16
vld1.8 {q6},[r4]
vld1.8 {q0},[r0],r8
vld1.32 {q8,q9},[r3] @ load key schedule...
sub r5,r5,#6
add r7,r3,r5,lsl#4 @ pointer to last 7 round keys
sub r5,r5,#2
vld1.32 {q10,q11},[r7]!
vld1.32 {q12,q13},[r7]!
vld1.32 {q14,q15},[r7]!
vld1.32 {q7},[r7]
add r7,r3,#32
mov r6,r5
beq .Lcbc_dec
cmp r5,#2
veor q0,q0,q6
veor q5,q8,q7
beq .Lcbc_enc128
vld1.32 {q2,q3},[r7]
add r7,r3,#16
add r6,r3,#16*4
add r12,r3,#16*5
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
add r14,r3,#16*6
add r3,r3,#16*7
b .Lenter_cbc_enc
.align 4
.Loop_cbc_enc:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vst1.8 {q6},[r1]!
.Lenter_cbc_enc:
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q8},[r6]
cmp r5,#4
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r12]
beq .Lcbc_enc192
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q8},[r14]
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r3]
nop
.Lcbc_enc192:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
subs r2,r2,#16
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
moveq r8,#0
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.8 {q8},[r0],r8
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
veor q8,q8,q5
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.32 {q9},[r7] @ re-pre-load rndkey[1]
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs .Loop_cbc_enc
vst1.8 {q6},[r1]!
b .Lcbc_done
.align 5
.Lcbc_enc128:
vld1.32 {q2,q3},[r7]
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
b .Lenter_cbc_enc128
.Loop_cbc_enc128:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vst1.8 {q6},[r1]!
.Lenter_cbc_enc128:
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
subs r2,r2,#16
.byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
moveq r8,#0
.byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
vld1.8 {q8},[r0],r8
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
veor q8,q8,q5
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
veor q6,q0,q7
bhs .Loop_cbc_enc128
vst1.8 {q6},[r1]!
b .Lcbc_done
.align 5
.Lcbc_dec:
vld1.8 {q10},[r0]!
subs r2,r2,#32 @ bias
add r6,r5,#2
vorr q3,q0,q0
vorr q1,q0,q0
vorr q11,q10,q10
blo .Lcbc_dec_tail
vorr q1,q10,q10
vld1.8 {q10},[r0]!
vorr q2,q0,q0
vorr q3,q1,q1
vorr q11,q10,q10
.Loop3x_cbc_dec:
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt .Loop3x_cbc_dec
.byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q4,q6,q7
subs r2,r2,#0x30
veor q5,q2,q7
movlo r6,r2 @ r6, r6, is zero at this point
.byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q9,q3,q7
add r0,r0,r6 @ r0 is adjusted in such way that
@ at exit from the loop q1-q10
@ are loaded with last "words"
vorr q6,q11,q11
mov r7,r3
.byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q2},[r0]!
.byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q3},[r0]!
.byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14
.byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.8 {q11},[r0]!
.byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
add r6,r5,#2
veor q4,q4,q0
veor q5,q5,q1
veor q10,q10,q9
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
vst1.8 {q4},[r1]!
vorr q0,q2,q2
vst1.8 {q5},[r1]!
vorr q1,q3,q3
vst1.8 {q10},[r1]!
vorr q10,q11,q11
bhs .Loop3x_cbc_dec
cmn r2,#0x30
beq .Lcbc_done
nop
.Lcbc_dec_tail:
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
vld1.32 {q9},[r7]!
bgt .Lcbc_dec_tail
.byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x60,0x43,0xf0,0xf3 @ aesd q10,q8
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
.byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x62,0x43,0xf0,0xf3 @ aesd q10,q9
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
.byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x68,0x43,0xf0,0xf3 @ aesd q10,q12
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
cmn r2,#0x20
.byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6a,0x43,0xf0,0xf3 @ aesd q10,q13
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q5,q6,q7
.byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14
.byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1
.byte 0x6c,0x43,0xf0,0xf3 @ aesd q10,q14
.byte 0xe4,0x43,0xf0,0xf3 @ aesimc q10,q10
veor q9,q3,q7
.byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15
.byte 0x6e,0x43,0xf0,0xf3 @ aesd q10,q15
beq .Lcbc_dec_one
veor q5,q5,q1
veor q9,q9,q10
vorr q6,q11,q11
vst1.8 {q5},[r1]!
vst1.8 {q9},[r1]!
b .Lcbc_done
.Lcbc_dec_one:
veor q5,q5,q10
vorr q6,q11,q11
vst1.8 {q5},[r1]!
.Lcbc_done:
vst1.8 {q6},[r4]
.Lcbc_abort:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,pc}
.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
.globl aes_hw_ctr32_encrypt_blocks
.hidden aes_hw_ctr32_encrypt_blocks
.type aes_hw_ctr32_encrypt_blocks,%function
.align 5
aes_hw_ctr32_encrypt_blocks:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldr r4, [ip] @ load remaining arg
ldr r5,[r3,#240]
ldr r8, [r4, #12]
vld1.32 {q0},[r4]
vld1.32 {q8,q9},[r3] @ load key schedule...
sub r5,r5,#4
mov r12,#16
cmp r2,#2
add r7,r3,r5,lsl#4 @ pointer to last 5 round keys
sub r5,r5,#2
vld1.32 {q12,q13},[r7]!
vld1.32 {q14,q15},[r7]!
vld1.32 {q7},[r7]
add r7,r3,#32
mov r6,r5
movlo r12,#0
#ifndef __ARMEB__
rev r8, r8
#endif
vorr q1,q0,q0
add r10, r8, #1
vorr q10,q0,q0
add r8, r8, #2
vorr q6,q0,q0
rev r10, r10
vmov.32 d3[1],r10
bls .Lctr32_tail
rev r12, r8
sub r2,r2,#3 @ bias
vmov.32 d21[1],r12
b .Loop3x_ctr32
.align 4
.Loop3x_ctr32:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.32 {q9},[r7]!
bgt .Loop3x_ctr32
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1
vld1.8 {q2},[r0]!
vorr q0,q6,q6
.byte 0x20,0x43,0xf0,0xf3 @ aese q10,q8
.byte 0xa4,0x43,0xf0,0xf3 @ aesmc q10,q10
vld1.8 {q3},[r0]!
vorr q1,q6,q6
.byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
vld1.8 {q11},[r0]!
mov r7,r3
.byte 0x22,0x43,0xf0,0xf3 @ aese q10,q9
.byte 0xa4,0x23,0xf0,0xf3 @ aesmc q9,q10
vorr q10,q6,q6
add r9,r8,#1
.byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
veor q2,q2,q7
add r10,r8,#2
.byte 0x28,0x23,0xf0,0xf3 @ aese q9,q12
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
veor q3,q3,q7
add r8,r8,#3
.byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
veor q11,q11,q7
rev r9,r9
.byte 0x2a,0x23,0xf0,0xf3 @ aese q9,q13
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
vmov.32 d1[1], r9
rev r10,r10
.byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14
.byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4
.byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14
.byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5
vmov.32 d3[1], r10
rev r12,r8
.byte 0x2c,0x23,0xf0,0xf3 @ aese q9,q14
.byte 0xa2,0x23,0xf0,0xf3 @ aesmc q9,q9
vmov.32 d21[1], r12
subs r2,r2,#3
.byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15
.byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15
.byte 0x2e,0x23,0xf0,0xf3 @ aese q9,q15
veor q2,q2,q4
vld1.32 {q8},[r7]! @ re-pre-load rndkey[0]
vst1.8 {q2},[r1]!
veor q3,q3,q5
mov r6,r5
vst1.8 {q3},[r1]!
veor q11,q11,q9
vld1.32 {q9},[r7]! @ re-pre-load rndkey[1]
vst1.8 {q11},[r1]!
bhs .Loop3x_ctr32
adds r2,r2,#3
beq .Lctr32_done
cmp r2,#1
mov r12,#16
moveq r12,#0
.Lctr32_tail:
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.32 {q8},[r7]!
subs r6,r6,#2
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.32 {q9},[r7]!
bgt .Lctr32_tail
.byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
.byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.8 {q2},[r0],r12
.byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
vld1.8 {q3},[r0]
.byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
veor q2,q2,q7
.byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14
.byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0
.byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14
.byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1
veor q3,q3,q7
.byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15
.byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15
cmp r2,#1
veor q2,q2,q0
veor q3,q3,q1
vst1.8 {q2},[r1]!
beq .Lctr32_done
vst1.8 {q3},[r1]
.Lctr32_done:
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits
#endif // defined(__arm__) && defined(__linux__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,779 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__aarch64__) && defined(__APPLE__)
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
#include <CBigNumBoringSSL_arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.section __TEXT,__const
.align 5
Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.text
.globl _aes_hw_set_encrypt_key
.private_extern _aes_hw_set_encrypt_key
.align 5
_aes_hw_set_encrypt_key:
Lenc_key:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
mov x3,#-1
cmp x0,#0
b.eq Lenc_key_abort
cmp x2,#0
b.eq Lenc_key_abort
mov x3,#-2
cmp w1,#128
b.lt Lenc_key_abort
cmp w1,#256
b.gt Lenc_key_abort
tst w1,#0x3f
b.ne Lenc_key_abort
adrp x3,Lrcon@PAGE
add x3,x3,Lrcon@PAGEOFF
cmp w1,#192
eor v0.16b,v0.16b,v0.16b
ld1 {v3.16b},[x0],#16
mov w1,#8 // reuse w1
ld1 {v1.4s,v2.4s},[x3],#32
b.lt Loop128
b.eq L192
b L256
.align 4
Loop128:
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
b.ne Loop128
ld1 {v1.4s},[x3]
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2]
add x2,x2,#0x50
mov w12,#10
b Ldone
.align 4
L192:
ld1 {v4.8b},[x0],#8
movi v6.16b,#8 // borrow v6.16b
st1 {v3.4s},[x2],#16
sub v2.16b,v2.16b,v6.16b // adjust the mask
Loop192:
tbl v6.16b,{v4.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v4.8b},[x2],#8
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
dup v5.4s,v3.s[3]
eor v5.16b,v5.16b,v4.16b
eor v6.16b,v6.16b,v1.16b
ext v4.16b,v0.16b,v4.16b,#12
shl v1.16b,v1.16b,#1
eor v4.16b,v4.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
eor v4.16b,v4.16b,v6.16b
st1 {v3.4s},[x2],#16
b.ne Loop192
mov w12,#12
add x2,x2,#0x20
b Ldone
.align 4
L256:
ld1 {v4.16b},[x0]
mov w1,#7
mov w12,#14
st1 {v3.4s},[x2],#16
Loop256:
tbl v6.16b,{v4.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v4.4s},[x2],#16
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2],#16
b.eq Ldone
dup v6.4s,v3.s[3] // just splat
ext v5.16b,v0.16b,v4.16b,#12
aese v6.16b,v0.16b
eor v4.16b,v4.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v4.16b,v4.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v4.16b,v4.16b,v5.16b
eor v4.16b,v4.16b,v6.16b
b Loop256
Ldone:
str w12,[x2]
mov x3,#0
Lenc_key_abort:
mov x0,x3 // return value
ldr x29,[sp],#16
ret
.globl _aes_hw_set_decrypt_key
.private_extern _aes_hw_set_decrypt_key
.align 5
_aes_hw_set_decrypt_key:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
bl Lenc_key
cmp x0,#0
b.ne Ldec_key_abort
sub x2,x2,#240 // restore original x2
mov x4,#-16
add x0,x2,x12,lsl#4 // end of key schedule
ld1 {v0.4s},[x2]
ld1 {v1.4s},[x0]
st1 {v0.4s},[x0],x4
st1 {v1.4s},[x2],#16
Loop_imc:
ld1 {v0.4s},[x2]
ld1 {v1.4s},[x0]
aesimc v0.16b,v0.16b
aesimc v1.16b,v1.16b
st1 {v0.4s},[x0],x4
st1 {v1.4s},[x2],#16
cmp x0,x2
b.hi Loop_imc
ld1 {v0.4s},[x2]
aesimc v0.16b,v0.16b
st1 {v0.4s},[x0]
eor x0,x0,x0 // return value
Ldec_key_abort:
ldp x29,x30,[sp],#16
ret
.globl _aes_hw_encrypt
.private_extern _aes_hw_encrypt
.align 5
_aes_hw_encrypt:
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
sub w3,w3,#2
ld1 {v1.4s},[x2],#16
Loop_enc:
aese v2.16b,v0.16b
aesmc v2.16b,v2.16b
ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aese v2.16b,v1.16b
aesmc v2.16b,v2.16b
ld1 {v1.4s},[x2],#16
b.gt Loop_enc
aese v2.16b,v0.16b
aesmc v2.16b,v2.16b
ld1 {v0.4s},[x2]
aese v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
st1 {v2.16b},[x1]
ret
.globl _aes_hw_decrypt
.private_extern _aes_hw_decrypt
.align 5
_aes_hw_decrypt:
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
sub w3,w3,#2
ld1 {v1.4s},[x2],#16
Loop_dec:
aesd v2.16b,v0.16b
aesimc v2.16b,v2.16b
ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aesd v2.16b,v1.16b
aesimc v2.16b,v2.16b
ld1 {v1.4s},[x2],#16
b.gt Loop_dec
aesd v2.16b,v0.16b
aesimc v2.16b,v2.16b
ld1 {v0.4s},[x2]
aesd v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
st1 {v2.16b},[x1]
ret
.globl _aes_hw_cbc_encrypt
.private_extern _aes_hw_cbc_encrypt
.align 5
_aes_hw_cbc_encrypt:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
subs x2,x2,#16
mov x8,#16
b.lo Lcbc_abort
csel x8,xzr,x8,eq
cmp w5,#0 // en- or decrypting?
ldr w5,[x3,#240]
and x2,x2,#-16
ld1 {v6.16b},[x4]
ld1 {v0.16b},[x0],x8
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#6
add x7,x3,x5,lsl#4 // pointer to last 7 round keys
sub w5,w5,#2
ld1 {v18.4s,v19.4s},[x7],#32
ld1 {v20.4s,v21.4s},[x7],#32
ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
b.eq Lcbc_dec
cmp w5,#2
eor v0.16b,v0.16b,v6.16b
eor v5.16b,v16.16b,v7.16b
b.eq Lcbc_enc128
ld1 {v2.4s,v3.4s},[x7]
add x7,x3,#16
add x6,x3,#16*4
add x12,x3,#16*5
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
add x14,x3,#16*6
add x3,x3,#16*7
b Lenter_cbc_enc
.align 4
Loop_cbc_enc:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
st1 {v6.16b},[x1],#16
Lenter_cbc_enc:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
ld1 {v16.4s},[x6]
cmp w5,#4
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x12]
b.eq Lcbc_enc192
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
ld1 {v16.4s},[x14]
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x3]
nop
Lcbc_enc192:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
subs x2,x2,#16
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
csel x8,xzr,x8,eq
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
eor v16.16b,v16.16b,v5.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs Loop_cbc_enc
st1 {v6.16b},[x1],#16
b Lcbc_done
.align 5
Lcbc_enc128:
ld1 {v2.4s,v3.4s},[x7]
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
b Lenter_cbc_enc128
Loop_cbc_enc128:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
st1 {v6.16b},[x1],#16
Lenter_cbc_enc128:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
subs x2,x2,#16
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
csel x8,xzr,x8,eq
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
eor v16.16b,v16.16b,v5.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs Loop_cbc_enc128
st1 {v6.16b},[x1],#16
b Lcbc_done
.align 5
Lcbc_dec:
ld1 {v18.16b},[x0],#16
subs x2,x2,#32 // bias
add w6,w5,#2
orr v3.16b,v0.16b,v0.16b
orr v1.16b,v0.16b,v0.16b
orr v19.16b,v18.16b,v18.16b
b.lo Lcbc_dec_tail
orr v1.16b,v18.16b,v18.16b
ld1 {v18.16b},[x0],#16
orr v2.16b,v0.16b,v0.16b
orr v3.16b,v1.16b,v1.16b
orr v19.16b,v18.16b,v18.16b
Loop3x_cbc_dec:
aesd v0.16b,v16.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v0.16b,v17.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt Loop3x_cbc_dec
aesd v0.16b,v16.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
eor v4.16b,v6.16b,v7.16b
subs x2,x2,#0x30
eor v5.16b,v2.16b,v7.16b
csel x6,x2,x6,lo // x6, w6, is zero at this point
aesd v0.16b,v17.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
eor v17.16b,v3.16b,v7.16b
add x0,x0,x6 // x0 is adjusted in such way that
// at exit from the loop v1.16b-v18.16b
// are loaded with last "words"
orr v6.16b,v19.16b,v19.16b
mov x7,x3
aesd v0.16b,v20.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
ld1 {v2.16b},[x0],#16
aesd v0.16b,v21.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
aesd v0.16b,v22.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
ld1 {v19.16b},[x0],#16
aesd v0.16b,v23.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
add w6,w5,#2
eor v4.16b,v4.16b,v0.16b
eor v5.16b,v5.16b,v1.16b
eor v18.16b,v18.16b,v17.16b
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v4.16b},[x1],#16
orr v0.16b,v2.16b,v2.16b
st1 {v5.16b},[x1],#16
orr v1.16b,v3.16b,v3.16b
st1 {v18.16b},[x1],#16
orr v18.16b,v19.16b,v19.16b
b.hs Loop3x_cbc_dec
cmn x2,#0x30
b.eq Lcbc_done
nop
Lcbc_dec_tail:
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt Lcbc_dec_tail
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
cmn x2,#0x20
aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
eor v5.16b,v6.16b,v7.16b
aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
eor v17.16b,v3.16b,v7.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
b.eq Lcbc_dec_one
eor v5.16b,v5.16b,v1.16b
eor v17.16b,v17.16b,v18.16b
orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
st1 {v17.16b},[x1],#16
b Lcbc_done
Lcbc_dec_one:
eor v5.16b,v5.16b,v18.16b
orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
Lcbc_done:
st1 {v6.16b},[x4]
Lcbc_abort:
ldr x29,[sp],#16
ret
.globl _aes_hw_ctr32_encrypt_blocks
.private_extern _aes_hw_ctr32_encrypt_blocks
.align 5
_aes_hw_ctr32_encrypt_blocks:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
ldr w5,[x3,#240]
ldr w8, [x4, #12]
ld1 {v0.4s},[x4]
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#4
mov x12,#16
cmp x2,#2
add x7,x3,x5,lsl#4 // pointer to last 5 round keys
sub w5,w5,#2
ld1 {v20.4s,v21.4s},[x7],#32
ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
csel x12,xzr,x12,lo
#ifndef __ARMEB__
rev w8, w8
#endif
orr v1.16b,v0.16b,v0.16b
add w10, w8, #1
orr v18.16b,v0.16b,v0.16b
add w8, w8, #2
orr v6.16b,v0.16b,v0.16b
rev w10, w10
mov v1.s[3],w10
b.ls Lctr32_tail
rev w12, w8
sub x2,x2,#3 // bias
mov v18.s[3],w12
b Loop3x_ctr32
.align 4
Loop3x_ctr32:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
aese v18.16b,v17.16b
aesmc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt Loop3x_ctr32
aese v0.16b,v16.16b
aesmc v4.16b,v0.16b
aese v1.16b,v16.16b
aesmc v5.16b,v1.16b
ld1 {v2.16b},[x0],#16
orr v0.16b,v6.16b,v6.16b
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
orr v1.16b,v6.16b,v6.16b
aese v4.16b,v17.16b
aesmc v4.16b,v4.16b
aese v5.16b,v17.16b
aesmc v5.16b,v5.16b
ld1 {v19.16b},[x0],#16
mov x7,x3
aese v18.16b,v17.16b
aesmc v17.16b,v18.16b
orr v18.16b,v6.16b,v6.16b
add w9,w8,#1
aese v4.16b,v20.16b
aesmc v4.16b,v4.16b
aese v5.16b,v20.16b
aesmc v5.16b,v5.16b
eor v2.16b,v2.16b,v7.16b
add w10,w8,#2
aese v17.16b,v20.16b
aesmc v17.16b,v17.16b
eor v3.16b,v3.16b,v7.16b
add w8,w8,#3
aese v4.16b,v21.16b
aesmc v4.16b,v4.16b
aese v5.16b,v21.16b
aesmc v5.16b,v5.16b
eor v19.16b,v19.16b,v7.16b
rev w9,w9
aese v17.16b,v21.16b
aesmc v17.16b,v17.16b
mov v0.s[3], w9
rev w10,w10
aese v4.16b,v22.16b
aesmc v4.16b,v4.16b
aese v5.16b,v22.16b
aesmc v5.16b,v5.16b
mov v1.s[3], w10
rev w12,w8
aese v17.16b,v22.16b
aesmc v17.16b,v17.16b
mov v18.s[3], w12
subs x2,x2,#3
aese v4.16b,v23.16b
aese v5.16b,v23.16b
aese v17.16b,v23.16b
eor v2.16b,v2.16b,v4.16b
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
st1 {v2.16b},[x1],#16
eor v3.16b,v3.16b,v5.16b
mov w6,w5
st1 {v3.16b},[x1],#16
eor v19.16b,v19.16b,v17.16b
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v19.16b},[x1],#16
b.hs Loop3x_ctr32
adds x2,x2,#3
b.eq Lctr32_done
cmp x2,#1
mov x12,#16
csel x12,xzr,x12,eq
Lctr32_tail:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
ld1 {v17.4s},[x7],#16
b.gt Lctr32_tail
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
ld1 {v2.16b},[x0],x12
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v1.16b,v20.16b
aesmc v1.16b,v1.16b
ld1 {v3.16b},[x0]
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v1.16b,v21.16b
aesmc v1.16b,v1.16b
eor v2.16b,v2.16b,v7.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v1.16b,v22.16b
aesmc v1.16b,v1.16b
eor v3.16b,v3.16b,v7.16b
aese v0.16b,v23.16b
aese v1.16b,v23.16b
cmp x2,#1
eor v2.16b,v2.16b,v0.16b
eor v3.16b,v3.16b,v1.16b
st1 {v2.16b},[x1],#16
b.eq Lctr32_done
st1 {v3.16b},[x1]
Lctr32_done:
ldr x29,[sp],#16
ret
#endif
#endif // !OPENSSL_NO_ASM
#endif // defined(__aarch64__) && defined(__APPLE__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,782 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__aarch64__) && defined(__linux__)
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
#include <CBigNumBoringSSL_arm_arch.h>
#if __ARM_MAX_ARCH__>=7
.text
.arch armv8-a+crypto
.section .rodata
.align 5
.Lrcon:
.long 0x01,0x01,0x01,0x01
.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
.long 0x1b,0x1b,0x1b,0x1b
.text
.globl aes_hw_set_encrypt_key
.hidden aes_hw_set_encrypt_key
.type aes_hw_set_encrypt_key,%function
.align 5
aes_hw_set_encrypt_key:
.Lenc_key:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
mov x3,#-1
cmp x0,#0
b.eq .Lenc_key_abort
cmp x2,#0
b.eq .Lenc_key_abort
mov x3,#-2
cmp w1,#128
b.lt .Lenc_key_abort
cmp w1,#256
b.gt .Lenc_key_abort
tst w1,#0x3f
b.ne .Lenc_key_abort
adrp x3,.Lrcon
add x3,x3,:lo12:.Lrcon
cmp w1,#192
eor v0.16b,v0.16b,v0.16b
ld1 {v3.16b},[x0],#16
mov w1,#8 // reuse w1
ld1 {v1.4s,v2.4s},[x3],#32
b.lt .Loop128
b.eq .L192
b .L256
.align 4
.Loop128:
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
b.ne .Loop128
ld1 {v1.4s},[x3]
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
tbl v6.16b,{v3.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v3.4s},[x2],#16
aese v6.16b,v0.16b
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2]
add x2,x2,#0x50
mov w12,#10
b .Ldone
.align 4
.L192:
ld1 {v4.8b},[x0],#8
movi v6.16b,#8 // borrow v6.16b
st1 {v3.4s},[x2],#16
sub v2.16b,v2.16b,v6.16b // adjust the mask
.Loop192:
tbl v6.16b,{v4.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v4.8b},[x2],#8
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
dup v5.4s,v3.s[3]
eor v5.16b,v5.16b,v4.16b
eor v6.16b,v6.16b,v1.16b
ext v4.16b,v0.16b,v4.16b,#12
shl v1.16b,v1.16b,#1
eor v4.16b,v4.16b,v5.16b
eor v3.16b,v3.16b,v6.16b
eor v4.16b,v4.16b,v6.16b
st1 {v3.4s},[x2],#16
b.ne .Loop192
mov w12,#12
add x2,x2,#0x20
b .Ldone
.align 4
.L256:
ld1 {v4.16b},[x0]
mov w1,#7
mov w12,#14
st1 {v3.4s},[x2],#16
.Loop256:
tbl v6.16b,{v4.16b},v2.16b
ext v5.16b,v0.16b,v3.16b,#12
st1 {v4.4s},[x2],#16
aese v6.16b,v0.16b
subs w1,w1,#1
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v3.16b,v3.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v6.16b,v6.16b,v1.16b
eor v3.16b,v3.16b,v5.16b
shl v1.16b,v1.16b,#1
eor v3.16b,v3.16b,v6.16b
st1 {v3.4s},[x2],#16
b.eq .Ldone
dup v6.4s,v3.s[3] // just splat
ext v5.16b,v0.16b,v4.16b,#12
aese v6.16b,v0.16b
eor v4.16b,v4.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v4.16b,v4.16b,v5.16b
ext v5.16b,v0.16b,v5.16b,#12
eor v4.16b,v4.16b,v5.16b
eor v4.16b,v4.16b,v6.16b
b .Loop256
.Ldone:
str w12,[x2]
mov x3,#0
.Lenc_key_abort:
mov x0,x3 // return value
ldr x29,[sp],#16
ret
.size aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
.globl aes_hw_set_decrypt_key
.hidden aes_hw_set_decrypt_key
.type aes_hw_set_decrypt_key,%function
.align 5
aes_hw_set_decrypt_key:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
bl .Lenc_key
cmp x0,#0
b.ne .Ldec_key_abort
sub x2,x2,#240 // restore original x2
mov x4,#-16
add x0,x2,x12,lsl#4 // end of key schedule
ld1 {v0.4s},[x2]
ld1 {v1.4s},[x0]
st1 {v0.4s},[x0],x4
st1 {v1.4s},[x2],#16
.Loop_imc:
ld1 {v0.4s},[x2]
ld1 {v1.4s},[x0]
aesimc v0.16b,v0.16b
aesimc v1.16b,v1.16b
st1 {v0.4s},[x0],x4
st1 {v1.4s},[x2],#16
cmp x0,x2
b.hi .Loop_imc
ld1 {v0.4s},[x2]
aesimc v0.16b,v0.16b
st1 {v0.4s},[x0]
eor x0,x0,x0 // return value
.Ldec_key_abort:
ldp x29,x30,[sp],#16
ret
.size aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
.globl aes_hw_encrypt
.hidden aes_hw_encrypt
.type aes_hw_encrypt,%function
.align 5
aes_hw_encrypt:
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
sub w3,w3,#2
ld1 {v1.4s},[x2],#16
.Loop_enc:
aese v2.16b,v0.16b
aesmc v2.16b,v2.16b
ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aese v2.16b,v1.16b
aesmc v2.16b,v2.16b
ld1 {v1.4s},[x2],#16
b.gt .Loop_enc
aese v2.16b,v0.16b
aesmc v2.16b,v2.16b
ld1 {v0.4s},[x2]
aese v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
st1 {v2.16b},[x1]
ret
.size aes_hw_encrypt,.-aes_hw_encrypt
.globl aes_hw_decrypt
.hidden aes_hw_decrypt
.type aes_hw_decrypt,%function
.align 5
aes_hw_decrypt:
ldr w3,[x2,#240]
ld1 {v0.4s},[x2],#16
ld1 {v2.16b},[x0]
sub w3,w3,#2
ld1 {v1.4s},[x2],#16
.Loop_dec:
aesd v2.16b,v0.16b
aesimc v2.16b,v2.16b
ld1 {v0.4s},[x2],#16
subs w3,w3,#2
aesd v2.16b,v1.16b
aesimc v2.16b,v2.16b
ld1 {v1.4s},[x2],#16
b.gt .Loop_dec
aesd v2.16b,v0.16b
aesimc v2.16b,v2.16b
ld1 {v0.4s},[x2]
aesd v2.16b,v1.16b
eor v2.16b,v2.16b,v0.16b
st1 {v2.16b},[x1]
ret
.size aes_hw_decrypt,.-aes_hw_decrypt
.globl aes_hw_cbc_encrypt
.hidden aes_hw_cbc_encrypt
.type aes_hw_cbc_encrypt,%function
.align 5
aes_hw_cbc_encrypt:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
subs x2,x2,#16
mov x8,#16
b.lo .Lcbc_abort
csel x8,xzr,x8,eq
cmp w5,#0 // en- or decrypting?
ldr w5,[x3,#240]
and x2,x2,#-16
ld1 {v6.16b},[x4]
ld1 {v0.16b},[x0],x8
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#6
add x7,x3,x5,lsl#4 // pointer to last 7 round keys
sub w5,w5,#2
ld1 {v18.4s,v19.4s},[x7],#32
ld1 {v20.4s,v21.4s},[x7],#32
ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
b.eq .Lcbc_dec
cmp w5,#2
eor v0.16b,v0.16b,v6.16b
eor v5.16b,v16.16b,v7.16b
b.eq .Lcbc_enc128
ld1 {v2.4s,v3.4s},[x7]
add x7,x3,#16
add x6,x3,#16*4
add x12,x3,#16*5
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
add x14,x3,#16*6
add x3,x3,#16*7
b .Lenter_cbc_enc
.align 4
.Loop_cbc_enc:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
st1 {v6.16b},[x1],#16
.Lenter_cbc_enc:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
ld1 {v16.4s},[x6]
cmp w5,#4
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x12]
b.eq .Lcbc_enc192
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
ld1 {v16.4s},[x14]
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x3]
nop
.Lcbc_enc192:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
subs x2,x2,#16
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
csel x8,xzr,x8,eq
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
eor v16.16b,v16.16b,v5.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs .Loop_cbc_enc
st1 {v6.16b},[x1],#16
b .Lcbc_done
.align 5
.Lcbc_enc128:
ld1 {v2.4s,v3.4s},[x7]
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
b .Lenter_cbc_enc128
.Loop_cbc_enc128:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
st1 {v6.16b},[x1],#16
.Lenter_cbc_enc128:
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
subs x2,x2,#16
aese v0.16b,v2.16b
aesmc v0.16b,v0.16b
csel x8,xzr,x8,eq
aese v0.16b,v3.16b
aesmc v0.16b,v0.16b
aese v0.16b,v18.16b
aesmc v0.16b,v0.16b
aese v0.16b,v19.16b
aesmc v0.16b,v0.16b
ld1 {v16.16b},[x0],x8
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
eor v16.16b,v16.16b,v5.16b
aese v0.16b,v23.16b
eor v6.16b,v0.16b,v7.16b
b.hs .Loop_cbc_enc128
st1 {v6.16b},[x1],#16
b .Lcbc_done
.align 5
.Lcbc_dec:
ld1 {v18.16b},[x0],#16
subs x2,x2,#32 // bias
add w6,w5,#2
orr v3.16b,v0.16b,v0.16b
orr v1.16b,v0.16b,v0.16b
orr v19.16b,v18.16b,v18.16b
b.lo .Lcbc_dec_tail
orr v1.16b,v18.16b,v18.16b
ld1 {v18.16b},[x0],#16
orr v2.16b,v0.16b,v0.16b
orr v3.16b,v1.16b,v1.16b
orr v19.16b,v18.16b,v18.16b
.Loop3x_cbc_dec:
aesd v0.16b,v16.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v0.16b,v17.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt .Loop3x_cbc_dec
aesd v0.16b,v16.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
eor v4.16b,v6.16b,v7.16b
subs x2,x2,#0x30
eor v5.16b,v2.16b,v7.16b
csel x6,x2,x6,lo // x6, w6, is zero at this point
aesd v0.16b,v17.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
eor v17.16b,v3.16b,v7.16b
add x0,x0,x6 // x0 is adjusted in such way that
// at exit from the loop v1.16b-v18.16b
// are loaded with last "words"
orr v6.16b,v19.16b,v19.16b
mov x7,x3
aesd v0.16b,v20.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
ld1 {v2.16b},[x0],#16
aesd v0.16b,v21.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
aesd v0.16b,v22.16b
aesimc v0.16b,v0.16b
aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
ld1 {v19.16b},[x0],#16
aesd v0.16b,v23.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
add w6,w5,#2
eor v4.16b,v4.16b,v0.16b
eor v5.16b,v5.16b,v1.16b
eor v18.16b,v18.16b,v17.16b
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v4.16b},[x1],#16
orr v0.16b,v2.16b,v2.16b
st1 {v5.16b},[x1],#16
orr v1.16b,v3.16b,v3.16b
st1 {v18.16b},[x1],#16
orr v18.16b,v19.16b,v19.16b
b.hs .Loop3x_cbc_dec
cmn x2,#0x30
b.eq .Lcbc_done
nop
.Lcbc_dec_tail:
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt .Lcbc_dec_tail
aesd v1.16b,v16.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v16.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v17.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v17.16b
aesimc v18.16b,v18.16b
aesd v1.16b,v20.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v20.16b
aesimc v18.16b,v18.16b
cmn x2,#0x20
aesd v1.16b,v21.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v21.16b
aesimc v18.16b,v18.16b
eor v5.16b,v6.16b,v7.16b
aesd v1.16b,v22.16b
aesimc v1.16b,v1.16b
aesd v18.16b,v22.16b
aesimc v18.16b,v18.16b
eor v17.16b,v3.16b,v7.16b
aesd v1.16b,v23.16b
aesd v18.16b,v23.16b
b.eq .Lcbc_dec_one
eor v5.16b,v5.16b,v1.16b
eor v17.16b,v17.16b,v18.16b
orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
st1 {v17.16b},[x1],#16
b .Lcbc_done
.Lcbc_dec_one:
eor v5.16b,v5.16b,v18.16b
orr v6.16b,v19.16b,v19.16b
st1 {v5.16b},[x1],#16
.Lcbc_done:
st1 {v6.16b},[x4]
.Lcbc_abort:
ldr x29,[sp],#16
ret
.size aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
.globl aes_hw_ctr32_encrypt_blocks
.hidden aes_hw_ctr32_encrypt_blocks
.type aes_hw_ctr32_encrypt_blocks,%function
.align 5
aes_hw_ctr32_encrypt_blocks:
stp x29,x30,[sp,#-16]!
add x29,sp,#0
ldr w5,[x3,#240]
ldr w8, [x4, #12]
ld1 {v0.4s},[x4]
ld1 {v16.4s,v17.4s},[x3] // load key schedule...
sub w5,w5,#4
mov x12,#16
cmp x2,#2
add x7,x3,x5,lsl#4 // pointer to last 5 round keys
sub w5,w5,#2
ld1 {v20.4s,v21.4s},[x7],#32
ld1 {v22.4s,v23.4s},[x7],#32
ld1 {v7.4s},[x7]
add x7,x3,#32
mov w6,w5
csel x12,xzr,x12,lo
#ifndef __ARMEB__
rev w8, w8
#endif
orr v1.16b,v0.16b,v0.16b
add w10, w8, #1
orr v18.16b,v0.16b,v0.16b
add w8, w8, #2
orr v6.16b,v0.16b,v0.16b
rev w10, w10
mov v1.s[3],w10
b.ls .Lctr32_tail
rev w12, w8
sub x2,x2,#3 // bias
mov v18.s[3],w12
b .Loop3x_ctr32
.align 4
.Loop3x_ctr32:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
aese v18.16b,v17.16b
aesmc v18.16b,v18.16b
ld1 {v17.4s},[x7],#16
b.gt .Loop3x_ctr32
aese v0.16b,v16.16b
aesmc v4.16b,v0.16b
aese v1.16b,v16.16b
aesmc v5.16b,v1.16b
ld1 {v2.16b},[x0],#16
orr v0.16b,v6.16b,v6.16b
aese v18.16b,v16.16b
aesmc v18.16b,v18.16b
ld1 {v3.16b},[x0],#16
orr v1.16b,v6.16b,v6.16b
aese v4.16b,v17.16b
aesmc v4.16b,v4.16b
aese v5.16b,v17.16b
aesmc v5.16b,v5.16b
ld1 {v19.16b},[x0],#16
mov x7,x3
aese v18.16b,v17.16b
aesmc v17.16b,v18.16b
orr v18.16b,v6.16b,v6.16b
add w9,w8,#1
aese v4.16b,v20.16b
aesmc v4.16b,v4.16b
aese v5.16b,v20.16b
aesmc v5.16b,v5.16b
eor v2.16b,v2.16b,v7.16b
add w10,w8,#2
aese v17.16b,v20.16b
aesmc v17.16b,v17.16b
eor v3.16b,v3.16b,v7.16b
add w8,w8,#3
aese v4.16b,v21.16b
aesmc v4.16b,v4.16b
aese v5.16b,v21.16b
aesmc v5.16b,v5.16b
eor v19.16b,v19.16b,v7.16b
rev w9,w9
aese v17.16b,v21.16b
aesmc v17.16b,v17.16b
mov v0.s[3], w9
rev w10,w10
aese v4.16b,v22.16b
aesmc v4.16b,v4.16b
aese v5.16b,v22.16b
aesmc v5.16b,v5.16b
mov v1.s[3], w10
rev w12,w8
aese v17.16b,v22.16b
aesmc v17.16b,v17.16b
mov v18.s[3], w12
subs x2,x2,#3
aese v4.16b,v23.16b
aese v5.16b,v23.16b
aese v17.16b,v23.16b
eor v2.16b,v2.16b,v4.16b
ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
st1 {v2.16b},[x1],#16
eor v3.16b,v3.16b,v5.16b
mov w6,w5
st1 {v3.16b},[x1],#16
eor v19.16b,v19.16b,v17.16b
ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
st1 {v19.16b},[x1],#16
b.hs .Loop3x_ctr32
adds x2,x2,#3
b.eq .Lctr32_done
cmp x2,#1
mov x12,#16
csel x12,xzr,x12,eq
.Lctr32_tail:
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
ld1 {v16.4s},[x7],#16
subs w6,w6,#2
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
ld1 {v17.4s},[x7],#16
b.gt .Lctr32_tail
aese v0.16b,v16.16b
aesmc v0.16b,v0.16b
aese v1.16b,v16.16b
aesmc v1.16b,v1.16b
aese v0.16b,v17.16b
aesmc v0.16b,v0.16b
aese v1.16b,v17.16b
aesmc v1.16b,v1.16b
ld1 {v2.16b},[x0],x12
aese v0.16b,v20.16b
aesmc v0.16b,v0.16b
aese v1.16b,v20.16b
aesmc v1.16b,v1.16b
ld1 {v3.16b},[x0]
aese v0.16b,v21.16b
aesmc v0.16b,v0.16b
aese v1.16b,v21.16b
aesmc v1.16b,v1.16b
eor v2.16b,v2.16b,v7.16b
aese v0.16b,v22.16b
aesmc v0.16b,v0.16b
aese v1.16b,v22.16b
aesmc v1.16b,v1.16b
eor v3.16b,v3.16b,v7.16b
aese v0.16b,v23.16b
aese v1.16b,v23.16b
cmp x2,#1
eor v2.16b,v2.16b,v0.16b
eor v3.16b,v3.16b,v1.16b
st1 {v2.16b},[x1],#16
b.eq .Lctr32_done
st1 {v3.16b},[x1]
.Lctr32_done:
ldr x29,[sp],#16
ret
.size aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits
#endif // defined(__aarch64__) && defined(__linux__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,989 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__arm__) && defined(__APPLE__)
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
#include <CBigNumBoringSSL_arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
.text
#if defined(__thumb2__)
.syntax unified
.thumb
#else
.code 32
#endif
#if __ARM_MAX_ARCH__>=7
.align 5
LOPENSSL_armcap:
.word OPENSSL_armcap_P-Lbn_mul_mont
#endif
.globl _bn_mul_mont
.private_extern _bn_mul_mont
#ifdef __thumb2__
.thumb_func _bn_mul_mont
#endif
.align 5
_bn_mul_mont:
Lbn_mul_mont:
ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne Lialu
adr r0,Lbn_mul_mont
ldr r2,LOPENSSL_armcap
ldr r0,[r0,r2]
#ifdef __APPLE__
ldr r0,[r0]
#endif
tst r0,#ARMV7_NEON @ NEON available?
ldmia sp, {r0,r2}
beq Lialu
add sp,sp,#8
b bn_mul8x_mont_neon
.align 4
Lialu:
#endif
cmp ip,#2
mov r0,ip @ load num
#ifdef __thumb2__
ittt lt
#endif
movlt r0,#0
addlt sp,sp,#2*4
blt Labrt
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
mov r0,r0,lsl#2 @ rescale r0 for byte count
sub sp,sp,r0 @ alloca(4*num)
sub sp,sp,#4 @ +extra dword
sub r0,r0,#4 @ "num=num-1"
add r4,r2,r0 @ &bp[num-1]
add r0,sp,r0 @ r0 to point at &tp[num-1]
ldr r8,[r0,#14*4] @ &n0
ldr r2,[r2] @ bp[0]
ldr r5,[r1],#4 @ ap[0],ap++
ldr r6,[r3],#4 @ np[0],np++
ldr r8,[r8] @ *n0
str r4,[r0,#15*4] @ save &bp[num]
umull r10,r11,r5,r2 @ ap[0]*bp[0]
str r8,[r0,#14*4] @ save n0 value
mul r8,r10,r8 @ "tp[0]"*n0
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
mov r4,sp
L1st:
ldr r5,[r1],#4 @ ap[j],ap++
mov r10,r11
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[0]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne L1st
adds r12,r12,r11
ldr r4,[r0,#13*4] @ restore bp
mov r14,#0
ldr r8,[r0,#14*4] @ restore n0
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
mov r7,sp
str r14,[r0,#4] @ tp[num]=
Louter:
sub r7,r0,r7 @ "original" r0-1 value
sub r1,r1,r7 @ "rewind" ap to &ap[1]
ldr r2,[r4,#4]! @ *(++bp)
sub r3,r3,r7 @ "rewind" np to &np[1]
ldr r5,[r1,#-4] @ ap[0]
ldr r10,[sp] @ tp[0]
ldr r6,[r3,#-4] @ np[0]
ldr r7,[sp,#4] @ tp[1]
mov r11,#0
umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0]
str r4,[r0,#13*4] @ save bp
mul r8,r10,r8
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
mov r4,sp
Linner:
ldr r5,[r1],#4 @ ap[j],ap++
adds r10,r11,r7 @ +=tp[j]
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[i]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adc r11,r11,#0
ldr r7,[r4,#8] @ tp[j+1]
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne Linner
adds r12,r12,r11
mov r14,#0
ldr r4,[r0,#13*4] @ restore bp
adc r14,r14,#0
ldr r8,[r0,#14*4] @ restore n0
adds r12,r12,r7
ldr r7,[r0,#15*4] @ restore &bp[num]
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
str r14,[r0,#4] @ tp[num]=
cmp r4,r7
#ifdef __thumb2__
itt ne
#endif
movne r7,sp
bne Louter
ldr r2,[r0,#12*4] @ pull rp
mov r5,sp
add r0,r0,#4 @ r0 to point at &tp[num]
sub r5,r0,r5 @ "original" num value
mov r4,sp @ "rewind" r4
mov r1,r4 @ "borrow" r1
sub r3,r3,r5 @ "rewind" r3 to &np[0]
subs r7,r7,r7 @ "clear" carry flag
Lsub: ldr r7,[r4],#4
ldr r6,[r3],#4
sbcs r7,r7,r6 @ tp[j]-np[j]
str r7,[r2],#4 @ rp[j]=
teq r4,r0 @ preserve carry
bne Lsub
sbcs r14,r14,#0 @ upmost carry
mov r4,sp @ "rewind" r4
sub r2,r2,r5 @ "rewind" r2
Lcopy: ldr r7,[r4] @ conditional copy
ldr r5,[r2]
str sp,[r4],#4 @ zap tp
#ifdef __thumb2__
it cc
#endif
movcc r5,r7
str r5,[r2],#4
teq r4,r0 @ preserve carry
bne Lcopy
mov sp,r0
add sp,sp,#4 @ skip over tp[num+1]
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
Labrt:
#if __ARM_ARCH__>=5
bx lr @ bx lr
#else
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
#if __ARM_MAX_ARCH__>=7
#ifdef __thumb2__
.thumb_func bn_mul8x_mont_neon
#endif
.align 5
bn_mul8x_mont_neon:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load rest of parameter block
mov ip,sp
cmp r5,#8
bhi LNEON_8n
@ special case for r5==8, everything is in register bank...
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
sub r7,sp,r5,lsl#4
vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-(
and r7,r7,#-64
vld1.32 {d30[0]}, [r4,:32]
mov sp,r7 @ alloca
vzip.16 d28,d8
vmull.u32 q6,d28,d0[0]
vmull.u32 q7,d28,d0[1]
vmull.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmull.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
veor d8,d8,d8
vmul.u32 d29,d29,d30
vmull.u32 q10,d28,d2[0]
vld1.32 {d4,d5,d6,d7}, [r3]!
vmull.u32 q11,d28,d2[1]
vmull.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmull.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
sub r9,r5,#1
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
b LNEON_outer8
.align 4
LNEON_outer8:
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
vzip.16 d28,d8
vadd.u64 d12,d12,d10
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
vmlal.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
veor d8,d8,d8
subs r9,r9,#1
vmul.u32 d29,d29,d30
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
bne LNEON_outer8
vadd.u64 d12,d12,d10
mov r7,sp
vshr.u64 d10,d12,#16
mov r8,r5
vadd.u64 d13,d13,d10
add r6,sp,#96
vshr.u64 d10,d13,#16
vzip.16 d12,d13
b LNEON_tail_entry
.align 4
LNEON_8n:
veor q6,q6,q6
sub r7,sp,#128
veor q7,q7,q7
sub r7,r7,r5,lsl#4
veor q8,q8,q8
and r7,r7,#-64
veor q9,q9,q9
mov sp,r7 @ alloca
veor q10,q10,q10
add r7,r7,#256
veor q11,q11,q11
sub r8,r5,#8
veor q12,q12,q12
veor q13,q13,q13
LNEON_8n_init:
vst1.64 {q6,q7},[r7,:256]!
subs r8,r8,#8
vst1.64 {q8,q9},[r7,:256]!
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12,q13},[r7,:256]!
bne LNEON_8n_init
add r6,sp,#256
vld1.32 {d0,d1,d2,d3},[r1]!
add r10,sp,#8
vld1.32 {d30[0]},[r4,:32]
mov r9,r5
b LNEON_8n_outer
.align 4
LNEON_8n_outer:
vld1.32 {d28[0]},[r2,:32]! @ *b++
veor d8,d8,d8
vzip.16 d28,d8
add r7,sp,#128
vld1.32 {d4,d5,d6,d7},[r3]!
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
vmlal.u32 q10,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q11,d28,d2[1]
vst1.32 {d28},[sp,:64] @ put aside smashed b[8*i+0]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q6,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q7,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q8,d29,d5[0]
vshr.u64 d12,d12,#16
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vadd.u64 d12,d12,d13
vmlal.u32 q11,d29,d6[1]
vshr.u64 d12,d12,#16
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vadd.u64 d14,d14,d12
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+0]
vmlal.u32 q7,d28,d0[0]
vld1.64 {q6},[r6,:128]!
vmlal.u32 q8,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q9,d28,d1[0]
vshl.i64 d29,d15,#16
vmlal.u32 q10,d28,d1[1]
vadd.u64 d29,d29,d14
vmlal.u32 q11,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q12,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+1]
vmlal.u32 q13,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q6,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q7,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q8,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q9,d29,d5[0]
vshr.u64 d14,d14,#16
vmlal.u32 q10,d29,d5[1]
vmlal.u32 q11,d29,d6[0]
vadd.u64 d14,d14,d15
vmlal.u32 q12,d29,d6[1]
vshr.u64 d14,d14,#16
vmlal.u32 q13,d29,d7[0]
vmlal.u32 q6,d29,d7[1]
vadd.u64 d16,d16,d14
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+1]
vmlal.u32 q8,d28,d0[0]
vld1.64 {q7},[r6,:128]!
vmlal.u32 q9,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q10,d28,d1[0]
vshl.i64 d29,d17,#16
vmlal.u32 q11,d28,d1[1]
vadd.u64 d29,d29,d16
vmlal.u32 q12,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q13,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+2]
vmlal.u32 q6,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q7,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q8,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q9,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q10,d29,d5[0]
vshr.u64 d16,d16,#16
vmlal.u32 q11,d29,d5[1]
vmlal.u32 q12,d29,d6[0]
vadd.u64 d16,d16,d17
vmlal.u32 q13,d29,d6[1]
vshr.u64 d16,d16,#16
vmlal.u32 q6,d29,d7[0]
vmlal.u32 q7,d29,d7[1]
vadd.u64 d18,d18,d16
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+2]
vmlal.u32 q9,d28,d0[0]
vld1.64 {q8},[r6,:128]!
vmlal.u32 q10,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q11,d28,d1[0]
vshl.i64 d29,d19,#16
vmlal.u32 q12,d28,d1[1]
vadd.u64 d29,d29,d18
vmlal.u32 q13,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q6,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+3]
vmlal.u32 q7,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q8,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q9,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q10,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q11,d29,d5[0]
vshr.u64 d18,d18,#16
vmlal.u32 q12,d29,d5[1]
vmlal.u32 q13,d29,d6[0]
vadd.u64 d18,d18,d19
vmlal.u32 q6,d29,d6[1]
vshr.u64 d18,d18,#16
vmlal.u32 q7,d29,d7[0]
vmlal.u32 q8,d29,d7[1]
vadd.u64 d20,d20,d18
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+3]
vmlal.u32 q10,d28,d0[0]
vld1.64 {q9},[r6,:128]!
vmlal.u32 q11,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q12,d28,d1[0]
vshl.i64 d29,d21,#16
vmlal.u32 q13,d28,d1[1]
vadd.u64 d29,d29,d20
vmlal.u32 q6,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q7,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+4]
vmlal.u32 q8,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q9,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q10,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q11,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q12,d29,d5[0]
vshr.u64 d20,d20,#16
vmlal.u32 q13,d29,d5[1]
vmlal.u32 q6,d29,d6[0]
vadd.u64 d20,d20,d21
vmlal.u32 q7,d29,d6[1]
vshr.u64 d20,d20,#16
vmlal.u32 q8,d29,d7[0]
vmlal.u32 q9,d29,d7[1]
vadd.u64 d22,d22,d20
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+4]
vmlal.u32 q11,d28,d0[0]
vld1.64 {q10},[r6,:128]!
vmlal.u32 q12,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q13,d28,d1[0]
vshl.i64 d29,d23,#16
vmlal.u32 q6,d28,d1[1]
vadd.u64 d29,d29,d22
vmlal.u32 q7,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q8,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+5]
vmlal.u32 q9,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q10,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q11,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q12,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q13,d29,d5[0]
vshr.u64 d22,d22,#16
vmlal.u32 q6,d29,d5[1]
vmlal.u32 q7,d29,d6[0]
vadd.u64 d22,d22,d23
vmlal.u32 q8,d29,d6[1]
vshr.u64 d22,d22,#16
vmlal.u32 q9,d29,d7[0]
vmlal.u32 q10,d29,d7[1]
vadd.u64 d24,d24,d22
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+5]
vmlal.u32 q12,d28,d0[0]
vld1.64 {q11},[r6,:128]!
vmlal.u32 q13,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q6,d28,d1[0]
vshl.i64 d29,d25,#16
vmlal.u32 q7,d28,d1[1]
vadd.u64 d29,d29,d24
vmlal.u32 q8,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q9,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+6]
vmlal.u32 q10,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q11,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q12,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q13,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q6,d29,d5[0]
vshr.u64 d24,d24,#16
vmlal.u32 q7,d29,d5[1]
vmlal.u32 q8,d29,d6[0]
vadd.u64 d24,d24,d25
vmlal.u32 q9,d29,d6[1]
vshr.u64 d24,d24,#16
vmlal.u32 q10,d29,d7[0]
vmlal.u32 q11,d29,d7[1]
vadd.u64 d26,d26,d24
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+6]
vmlal.u32 q13,d28,d0[0]
vld1.64 {q12},[r6,:128]!
vmlal.u32 q6,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q7,d28,d1[0]
vshl.i64 d29,d27,#16
vmlal.u32 q8,d28,d1[1]
vadd.u64 d29,d29,d26
vmlal.u32 q9,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q10,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+7]
vmlal.u32 q11,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q12,d28,d3[1]
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
vmlal.u32 q13,d29,d4[0]
vld1.32 {d0,d1,d2,d3},[r1]!
vmlal.u32 q6,d29,d4[1]
vmlal.u32 q7,d29,d5[0]
vshr.u64 d26,d26,#16
vmlal.u32 q8,d29,d5[1]
vmlal.u32 q9,d29,d6[0]
vadd.u64 d26,d26,d27
vmlal.u32 q10,d29,d6[1]
vshr.u64 d26,d26,#16
vmlal.u32 q11,d29,d7[0]
vmlal.u32 q12,d29,d7[1]
vadd.u64 d12,d12,d26
vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7]
add r10,sp,#8 @ rewind
sub r8,r5,#8
b LNEON_8n_inner
.align 4
LNEON_8n_inner:
subs r8,r8,#8
vmlal.u32 q6,d28,d0[0]
vld1.64 {q13},[r6,:128]
vmlal.u32 q7,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+0]
vmlal.u32 q8,d28,d1[0]
vld1.32 {d4,d5,d6,d7},[r3]!
vmlal.u32 q9,d28,d1[1]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vmlal.u32 q13,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmlal.u32 q11,d29,d6[1]
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vst1.64 {q6},[r7,:128]!
vmlal.u32 q7,d28,d0[0]
vld1.64 {q6},[r6,:128]
vmlal.u32 q8,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+1]
vmlal.u32 q9,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q10,d28,d1[1]
vmlal.u32 q11,d28,d2[0]
vmlal.u32 q12,d28,d2[1]
vmlal.u32 q13,d28,d3[0]
vmlal.u32 q6,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+2]
vmlal.u32 q7,d29,d4[0]
vmlal.u32 q8,d29,d4[1]
vmlal.u32 q9,d29,d5[0]
vmlal.u32 q10,d29,d5[1]
vmlal.u32 q11,d29,d6[0]
vmlal.u32 q12,d29,d6[1]
vmlal.u32 q13,d29,d7[0]
vmlal.u32 q6,d29,d7[1]
vst1.64 {q7},[r7,:128]!
vmlal.u32 q8,d28,d0[0]
vld1.64 {q7},[r6,:128]
vmlal.u32 q9,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+2]
vmlal.u32 q10,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q11,d28,d1[1]
vmlal.u32 q12,d28,d2[0]
vmlal.u32 q13,d28,d2[1]
vmlal.u32 q6,d28,d3[0]
vmlal.u32 q7,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+3]
vmlal.u32 q8,d29,d4[0]
vmlal.u32 q9,d29,d4[1]
vmlal.u32 q10,d29,d5[0]
vmlal.u32 q11,d29,d5[1]
vmlal.u32 q12,d29,d6[0]
vmlal.u32 q13,d29,d6[1]
vmlal.u32 q6,d29,d7[0]
vmlal.u32 q7,d29,d7[1]
vst1.64 {q8},[r7,:128]!
vmlal.u32 q9,d28,d0[0]
vld1.64 {q8},[r6,:128]
vmlal.u32 q10,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+3]
vmlal.u32 q11,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q12,d28,d1[1]
vmlal.u32 q13,d28,d2[0]
vmlal.u32 q6,d28,d2[1]
vmlal.u32 q7,d28,d3[0]
vmlal.u32 q8,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+4]
vmlal.u32 q9,d29,d4[0]
vmlal.u32 q10,d29,d4[1]
vmlal.u32 q11,d29,d5[0]
vmlal.u32 q12,d29,d5[1]
vmlal.u32 q13,d29,d6[0]
vmlal.u32 q6,d29,d6[1]
vmlal.u32 q7,d29,d7[0]
vmlal.u32 q8,d29,d7[1]
vst1.64 {q9},[r7,:128]!
vmlal.u32 q10,d28,d0[0]
vld1.64 {q9},[r6,:128]
vmlal.u32 q11,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+4]
vmlal.u32 q12,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q13,d28,d1[1]
vmlal.u32 q6,d28,d2[0]
vmlal.u32 q7,d28,d2[1]
vmlal.u32 q8,d28,d3[0]
vmlal.u32 q9,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+5]
vmlal.u32 q10,d29,d4[0]
vmlal.u32 q11,d29,d4[1]
vmlal.u32 q12,d29,d5[0]
vmlal.u32 q13,d29,d5[1]
vmlal.u32 q6,d29,d6[0]
vmlal.u32 q7,d29,d6[1]
vmlal.u32 q8,d29,d7[0]
vmlal.u32 q9,d29,d7[1]
vst1.64 {q10},[r7,:128]!
vmlal.u32 q11,d28,d0[0]
vld1.64 {q10},[r6,:128]
vmlal.u32 q12,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+5]
vmlal.u32 q13,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q6,d28,d1[1]
vmlal.u32 q7,d28,d2[0]
vmlal.u32 q8,d28,d2[1]
vmlal.u32 q9,d28,d3[0]
vmlal.u32 q10,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+6]
vmlal.u32 q11,d29,d4[0]
vmlal.u32 q12,d29,d4[1]
vmlal.u32 q13,d29,d5[0]
vmlal.u32 q6,d29,d5[1]
vmlal.u32 q7,d29,d6[0]
vmlal.u32 q8,d29,d6[1]
vmlal.u32 q9,d29,d7[0]
vmlal.u32 q10,d29,d7[1]
vst1.64 {q11},[r7,:128]!
vmlal.u32 q12,d28,d0[0]
vld1.64 {q11},[r6,:128]
vmlal.u32 q13,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+6]
vmlal.u32 q6,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q7,d28,d1[1]
vmlal.u32 q8,d28,d2[0]
vmlal.u32 q9,d28,d2[1]
vmlal.u32 q10,d28,d3[0]
vmlal.u32 q11,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+7]
vmlal.u32 q12,d29,d4[0]
vmlal.u32 q13,d29,d4[1]
vmlal.u32 q6,d29,d5[0]
vmlal.u32 q7,d29,d5[1]
vmlal.u32 q8,d29,d6[0]
vmlal.u32 q9,d29,d6[1]
vmlal.u32 q10,d29,d7[0]
vmlal.u32 q11,d29,d7[1]
vst1.64 {q12},[r7,:128]!
vmlal.u32 q13,d28,d0[0]
vld1.64 {q12},[r6,:128]
vmlal.u32 q6,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+7]
vmlal.u32 q7,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q8,d28,d1[1]
vmlal.u32 q9,d28,d2[0]
vmlal.u32 q10,d28,d2[1]
vmlal.u32 q11,d28,d3[0]
vmlal.u32 q12,d28,d3[1]
it eq
subeq r1,r1,r5,lsl#2 @ rewind
vmlal.u32 q13,d29,d4[0]
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
vmlal.u32 q6,d29,d4[1]
vld1.32 {d0,d1,d2,d3},[r1]!
vmlal.u32 q7,d29,d5[0]
add r10,sp,#8 @ rewind
vmlal.u32 q8,d29,d5[1]
vmlal.u32 q9,d29,d6[0]
vmlal.u32 q10,d29,d6[1]
vmlal.u32 q11,d29,d7[0]
vst1.64 {q13},[r7,:128]!
vmlal.u32 q12,d29,d7[1]
bne LNEON_8n_inner
add r6,sp,#128
vst1.64 {q6,q7},[r7,:256]!
veor q2,q2,q2 @ d4-d5
vst1.64 {q8,q9},[r7,:256]!
veor q3,q3,q3 @ d6-d7
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12},[r7,:128]
subs r9,r9,#8
vld1.64 {q6,q7},[r6,:256]!
vld1.64 {q8,q9},[r6,:256]!
vld1.64 {q10,q11},[r6,:256]!
vld1.64 {q12,q13},[r6,:256]!
itt ne
subne r3,r3,r5,lsl#2 @ rewind
bne LNEON_8n_outer
add r7,sp,#128
vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame
vshr.u64 d10,d12,#16
vst1.64 {q2,q3},[sp,:256]!
vadd.u64 d13,d13,d10
vst1.64 {q2,q3}, [sp,:256]!
vshr.u64 d10,d13,#16
vst1.64 {q2,q3}, [sp,:256]!
vzip.16 d12,d13
mov r8,r5
b LNEON_tail_entry
.align 4
LNEON_tail:
vadd.u64 d12,d12,d10
vshr.u64 d10,d12,#16
vld1.64 {q8,q9}, [r6, :256]!
vadd.u64 d13,d13,d10
vld1.64 {q10,q11}, [r6, :256]!
vshr.u64 d10,d13,#16
vld1.64 {q12,q13}, [r6, :256]!
vzip.16 d12,d13
LNEON_tail_entry:
vadd.u64 d14,d14,d10
vst1.32 {d12[0]}, [r7, :32]!
vshr.u64 d10,d14,#16
vadd.u64 d15,d15,d10
vshr.u64 d10,d15,#16
vzip.16 d14,d15
vadd.u64 d16,d16,d10
vst1.32 {d14[0]}, [r7, :32]!
vshr.u64 d10,d16,#16
vadd.u64 d17,d17,d10
vshr.u64 d10,d17,#16
vzip.16 d16,d17
vadd.u64 d18,d18,d10
vst1.32 {d16[0]}, [r7, :32]!
vshr.u64 d10,d18,#16
vadd.u64 d19,d19,d10
vshr.u64 d10,d19,#16
vzip.16 d18,d19
vadd.u64 d20,d20,d10
vst1.32 {d18[0]}, [r7, :32]!
vshr.u64 d10,d20,#16
vadd.u64 d21,d21,d10
vshr.u64 d10,d21,#16
vzip.16 d20,d21
vadd.u64 d22,d22,d10
vst1.32 {d20[0]}, [r7, :32]!
vshr.u64 d10,d22,#16
vadd.u64 d23,d23,d10
vshr.u64 d10,d23,#16
vzip.16 d22,d23
vadd.u64 d24,d24,d10
vst1.32 {d22[0]}, [r7, :32]!
vshr.u64 d10,d24,#16
vadd.u64 d25,d25,d10
vshr.u64 d10,d25,#16
vzip.16 d24,d25
vadd.u64 d26,d26,d10
vst1.32 {d24[0]}, [r7, :32]!
vshr.u64 d10,d26,#16
vadd.u64 d27,d27,d10
vshr.u64 d10,d27,#16
vzip.16 d26,d27
vld1.64 {q6,q7}, [r6, :256]!
subs r8,r8,#8
vst1.32 {d26[0]}, [r7, :32]!
bne LNEON_tail
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
sub r3,r3,r5,lsl#2 @ rewind r3
subs r1,sp,#0 @ clear carry flag
add r2,sp,r5,lsl#2
LNEON_sub:
ldmia r1!, {r4,r5,r6,r7}
ldmia r3!, {r8,r9,r10,r11}
sbcs r8, r4,r8
sbcs r9, r5,r9
sbcs r10,r6,r10
sbcs r11,r7,r11
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne LNEON_sub
ldr r10, [r1] @ load top-most bit
mov r11,sp
veor q0,q0,q0
sub r11,r2,r11 @ this is num*4
veor q1,q1,q1
mov r1,sp
sub r0,r0,r11 @ rewind r0
mov r3,r2 @ second 3/4th of frame
sbcs r10,r10,#0 @ result is carry flag
LNEON_copy_n_zap:
ldmia r1!, {r4,r5,r6,r7}
ldmia r0, {r8,r9,r10,r11}
it cc
movcc r8, r4
vst1.64 {q0,q1}, [r3,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
it cc
movcc r11,r7
ldmia r1, {r4,r5,r6,r7}
stmia r0!, {r8,r9,r10,r11}
sub r1,r1,#16
ldmia r0, {r8,r9,r10,r11}
it cc
movcc r8, r4
vst1.64 {q0,q1}, [r1,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
it cc
movcc r11,r7
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne LNEON_copy_n_zap
mov sp,ip
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
bx lr @ bx lr
#endif
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#if __ARM_MAX_ARCH__>=7
.comm _OPENSSL_armcap_P,4
.non_lazy_symbol_pointer
OPENSSL_armcap_P:
.indirect_symbol _OPENSSL_armcap_P
.long 0
.private_extern _OPENSSL_armcap_P
#endif
#endif // !OPENSSL_NO_ASM
#endif // defined(__arm__) && defined(__APPLE__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,984 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__arm__) && defined(__linux__)
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
#include <CBigNumBoringSSL_arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
.arch armv7-a
.text
#if defined(__thumb2__)
.syntax unified
.thumb
#else
.code 32
#endif
#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-.Lbn_mul_mont
#endif
.globl bn_mul_mont
.hidden bn_mul_mont
.type bn_mul_mont,%function
.align 5
bn_mul_mont:
.Lbn_mul_mont:
ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne .Lialu
adr r0,.Lbn_mul_mont
ldr r2,.LOPENSSL_armcap
ldr r0,[r0,r2]
#ifdef __APPLE__
ldr r0,[r0]
#endif
tst r0,#ARMV7_NEON @ NEON available?
ldmia sp, {r0,r2}
beq .Lialu
add sp,sp,#8
b bn_mul8x_mont_neon
.align 4
.Lialu:
#endif
cmp ip,#2
mov r0,ip @ load num
#ifdef __thumb2__
ittt lt
#endif
movlt r0,#0
addlt sp,sp,#2*4
blt .Labrt
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ save 10 registers
mov r0,r0,lsl#2 @ rescale r0 for byte count
sub sp,sp,r0 @ alloca(4*num)
sub sp,sp,#4 @ +extra dword
sub r0,r0,#4 @ "num=num-1"
add r4,r2,r0 @ &bp[num-1]
add r0,sp,r0 @ r0 to point at &tp[num-1]
ldr r8,[r0,#14*4] @ &n0
ldr r2,[r2] @ bp[0]
ldr r5,[r1],#4 @ ap[0],ap++
ldr r6,[r3],#4 @ np[0],np++
ldr r8,[r8] @ *n0
str r4,[r0,#15*4] @ save &bp[num]
umull r10,r11,r5,r2 @ ap[0]*bp[0]
str r8,[r0,#14*4] @ save n0 value
mul r8,r10,r8 @ "tp[0]"*n0
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"t[0]"
mov r4,sp
.L1st:
ldr r5,[r1],#4 @ ap[j],ap++
mov r10,r11
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[0]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne .L1st
adds r12,r12,r11
ldr r4,[r0,#13*4] @ restore bp
mov r14,#0
ldr r8,[r0,#14*4] @ restore n0
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
mov r7,sp
str r14,[r0,#4] @ tp[num]=
.Louter:
sub r7,r0,r7 @ "original" r0-1 value
sub r1,r1,r7 @ "rewind" ap to &ap[1]
ldr r2,[r4,#4]! @ *(++bp)
sub r3,r3,r7 @ "rewind" np to &np[1]
ldr r5,[r1,#-4] @ ap[0]
ldr r10,[sp] @ tp[0]
ldr r6,[r3,#-4] @ np[0]
ldr r7,[sp,#4] @ tp[1]
mov r11,#0
umlal r10,r11,r5,r2 @ ap[0]*bp[i]+tp[0]
str r4,[r0,#13*4] @ save bp
mul r8,r10,r8
mov r12,#0
umlal r10,r12,r6,r8 @ np[0]*n0+"tp[0]"
mov r4,sp
.Linner:
ldr r5,[r1],#4 @ ap[j],ap++
adds r10,r11,r7 @ +=tp[j]
ldr r6,[r3],#4 @ np[j],np++
mov r11,#0
umlal r10,r11,r5,r2 @ ap[j]*bp[i]
mov r14,#0
umlal r12,r14,r6,r8 @ np[j]*n0
adc r11,r11,#0
ldr r7,[r4,#8] @ tp[j+1]
adds r12,r12,r10
str r12,[r4],#4 @ tp[j-1]=,tp++
adc r12,r14,#0
cmp r4,r0
bne .Linner
adds r12,r12,r11
mov r14,#0
ldr r4,[r0,#13*4] @ restore bp
adc r14,r14,#0
ldr r8,[r0,#14*4] @ restore n0
adds r12,r12,r7
ldr r7,[r0,#15*4] @ restore &bp[num]
adc r14,r14,#0
str r12,[r0] @ tp[num-1]=
str r14,[r0,#4] @ tp[num]=
cmp r4,r7
#ifdef __thumb2__
itt ne
#endif
movne r7,sp
bne .Louter
ldr r2,[r0,#12*4] @ pull rp
mov r5,sp
add r0,r0,#4 @ r0 to point at &tp[num]
sub r5,r0,r5 @ "original" num value
mov r4,sp @ "rewind" r4
mov r1,r4 @ "borrow" r1
sub r3,r3,r5 @ "rewind" r3 to &np[0]
subs r7,r7,r7 @ "clear" carry flag
.Lsub: ldr r7,[r4],#4
ldr r6,[r3],#4
sbcs r7,r7,r6 @ tp[j]-np[j]
str r7,[r2],#4 @ rp[j]=
teq r4,r0 @ preserve carry
bne .Lsub
sbcs r14,r14,#0 @ upmost carry
mov r4,sp @ "rewind" r4
sub r2,r2,r5 @ "rewind" r2
.Lcopy: ldr r7,[r4] @ conditional copy
ldr r5,[r2]
str sp,[r4],#4 @ zap tp
#ifdef __thumb2__
it cc
#endif
movcc r5,r7
str r5,[r2],#4
teq r4,r0 @ preserve carry
bne .Lcopy
mov sp,r0
add sp,sp,#4 @ skip over tp[num+1]
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} @ restore registers
add sp,sp,#2*4 @ skip over {r0,r2}
mov r0,#1
.Labrt:
#if __ARM_ARCH__>=5
bx lr @ bx lr
#else
tst lr,#1
moveq pc,lr @ be binary compatible with V4, yet
.word 0xe12fff1e @ interoperable with Thumb ISA:-)
#endif
.size bn_mul_mont,.-bn_mul_mont
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.type bn_mul8x_mont_neon,%function
.align 5
bn_mul8x_mont_neon:
mov ip,sp
stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ ABI specification says so
ldmia ip,{r4,r5} @ load rest of parameter block
mov ip,sp
cmp r5,#8
bhi .LNEON_8n
@ special case for r5==8, everything is in register bank...
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
sub r7,sp,r5,lsl#4
vld1.32 {d0,d1,d2,d3}, [r1]! @ can't specify :32 :-(
and r7,r7,#-64
vld1.32 {d30[0]}, [r4,:32]
mov sp,r7 @ alloca
vzip.16 d28,d8
vmull.u32 q6,d28,d0[0]
vmull.u32 q7,d28,d0[1]
vmull.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmull.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
veor d8,d8,d8
vmul.u32 d29,d29,d30
vmull.u32 q10,d28,d2[0]
vld1.32 {d4,d5,d6,d7}, [r3]!
vmull.u32 q11,d28,d2[1]
vmull.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmull.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
sub r9,r5,#1
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
b .LNEON_outer8
.align 4
.LNEON_outer8:
vld1.32 {d28[0]}, [r2,:32]!
veor d8,d8,d8
vzip.16 d28,d8
vadd.u64 d12,d12,d10
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
vmlal.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
veor d8,d8,d8
subs r9,r9,#1
vmul.u32 d29,d29,d30
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmov q5,q6
vmlal.u32 q11,d29,d6[1]
vmov q6,q7
vmlal.u32 q12,d29,d7[0]
vmov q7,q8
vmlal.u32 q13,d29,d7[1]
vmov q8,q9
vmov q9,q10
vshr.u64 d10,d10,#16
vmov q10,q11
vmov q11,q12
vadd.u64 d10,d10,d11
vmov q12,q13
veor q13,q13
vshr.u64 d10,d10,#16
bne .LNEON_outer8
vadd.u64 d12,d12,d10
mov r7,sp
vshr.u64 d10,d12,#16
mov r8,r5
vadd.u64 d13,d13,d10
add r6,sp,#96
vshr.u64 d10,d13,#16
vzip.16 d12,d13
b .LNEON_tail_entry
.align 4
.LNEON_8n:
veor q6,q6,q6
sub r7,sp,#128
veor q7,q7,q7
sub r7,r7,r5,lsl#4
veor q8,q8,q8
and r7,r7,#-64
veor q9,q9,q9
mov sp,r7 @ alloca
veor q10,q10,q10
add r7,r7,#256
veor q11,q11,q11
sub r8,r5,#8
veor q12,q12,q12
veor q13,q13,q13
.LNEON_8n_init:
vst1.64 {q6,q7},[r7,:256]!
subs r8,r8,#8
vst1.64 {q8,q9},[r7,:256]!
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12,q13},[r7,:256]!
bne .LNEON_8n_init
add r6,sp,#256
vld1.32 {d0,d1,d2,d3},[r1]!
add r10,sp,#8
vld1.32 {d30[0]},[r4,:32]
mov r9,r5
b .LNEON_8n_outer
.align 4
.LNEON_8n_outer:
vld1.32 {d28[0]},[r2,:32]! @ *b++
veor d8,d8,d8
vzip.16 d28,d8
add r7,sp,#128
vld1.32 {d4,d5,d6,d7},[r3]!
vmlal.u32 q6,d28,d0[0]
vmlal.u32 q7,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q8,d28,d1[0]
vshl.i64 d29,d13,#16
vmlal.u32 q9,d28,d1[1]
vadd.u64 d29,d29,d12
vmlal.u32 q10,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q11,d28,d2[1]
vst1.32 {d28},[sp,:64] @ put aside smashed b[8*i+0]
vmlal.u32 q12,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q13,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q6,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q7,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q8,d29,d5[0]
vshr.u64 d12,d12,#16
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vadd.u64 d12,d12,d13
vmlal.u32 q11,d29,d6[1]
vshr.u64 d12,d12,#16
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vadd.u64 d14,d14,d12
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+0]
vmlal.u32 q7,d28,d0[0]
vld1.64 {q6},[r6,:128]!
vmlal.u32 q8,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q9,d28,d1[0]
vshl.i64 d29,d15,#16
vmlal.u32 q10,d28,d1[1]
vadd.u64 d29,d29,d14
vmlal.u32 q11,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q12,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+1]
vmlal.u32 q13,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q6,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q7,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q8,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q9,d29,d5[0]
vshr.u64 d14,d14,#16
vmlal.u32 q10,d29,d5[1]
vmlal.u32 q11,d29,d6[0]
vadd.u64 d14,d14,d15
vmlal.u32 q12,d29,d6[1]
vshr.u64 d14,d14,#16
vmlal.u32 q13,d29,d7[0]
vmlal.u32 q6,d29,d7[1]
vadd.u64 d16,d16,d14
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+1]
vmlal.u32 q8,d28,d0[0]
vld1.64 {q7},[r6,:128]!
vmlal.u32 q9,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q10,d28,d1[0]
vshl.i64 d29,d17,#16
vmlal.u32 q11,d28,d1[1]
vadd.u64 d29,d29,d16
vmlal.u32 q12,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q13,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+2]
vmlal.u32 q6,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q7,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q8,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q9,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q10,d29,d5[0]
vshr.u64 d16,d16,#16
vmlal.u32 q11,d29,d5[1]
vmlal.u32 q12,d29,d6[0]
vadd.u64 d16,d16,d17
vmlal.u32 q13,d29,d6[1]
vshr.u64 d16,d16,#16
vmlal.u32 q6,d29,d7[0]
vmlal.u32 q7,d29,d7[1]
vadd.u64 d18,d18,d16
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+2]
vmlal.u32 q9,d28,d0[0]
vld1.64 {q8},[r6,:128]!
vmlal.u32 q10,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q11,d28,d1[0]
vshl.i64 d29,d19,#16
vmlal.u32 q12,d28,d1[1]
vadd.u64 d29,d29,d18
vmlal.u32 q13,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q6,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+3]
vmlal.u32 q7,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q8,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q9,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q10,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q11,d29,d5[0]
vshr.u64 d18,d18,#16
vmlal.u32 q12,d29,d5[1]
vmlal.u32 q13,d29,d6[0]
vadd.u64 d18,d18,d19
vmlal.u32 q6,d29,d6[1]
vshr.u64 d18,d18,#16
vmlal.u32 q7,d29,d7[0]
vmlal.u32 q8,d29,d7[1]
vadd.u64 d20,d20,d18
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+3]
vmlal.u32 q10,d28,d0[0]
vld1.64 {q9},[r6,:128]!
vmlal.u32 q11,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q12,d28,d1[0]
vshl.i64 d29,d21,#16
vmlal.u32 q13,d28,d1[1]
vadd.u64 d29,d29,d20
vmlal.u32 q6,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q7,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+4]
vmlal.u32 q8,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q9,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q10,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q11,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q12,d29,d5[0]
vshr.u64 d20,d20,#16
vmlal.u32 q13,d29,d5[1]
vmlal.u32 q6,d29,d6[0]
vadd.u64 d20,d20,d21
vmlal.u32 q7,d29,d6[1]
vshr.u64 d20,d20,#16
vmlal.u32 q8,d29,d7[0]
vmlal.u32 q9,d29,d7[1]
vadd.u64 d22,d22,d20
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+4]
vmlal.u32 q11,d28,d0[0]
vld1.64 {q10},[r6,:128]!
vmlal.u32 q12,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q13,d28,d1[0]
vshl.i64 d29,d23,#16
vmlal.u32 q6,d28,d1[1]
vadd.u64 d29,d29,d22
vmlal.u32 q7,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q8,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+5]
vmlal.u32 q9,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q10,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q11,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q12,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q13,d29,d5[0]
vshr.u64 d22,d22,#16
vmlal.u32 q6,d29,d5[1]
vmlal.u32 q7,d29,d6[0]
vadd.u64 d22,d22,d23
vmlal.u32 q8,d29,d6[1]
vshr.u64 d22,d22,#16
vmlal.u32 q9,d29,d7[0]
vmlal.u32 q10,d29,d7[1]
vadd.u64 d24,d24,d22
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+5]
vmlal.u32 q12,d28,d0[0]
vld1.64 {q11},[r6,:128]!
vmlal.u32 q13,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q6,d28,d1[0]
vshl.i64 d29,d25,#16
vmlal.u32 q7,d28,d1[1]
vadd.u64 d29,d29,d24
vmlal.u32 q8,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q9,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+6]
vmlal.u32 q10,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q11,d28,d3[1]
vld1.32 {d28[0]},[r2,:32]! @ *b++
vmlal.u32 q12,d29,d4[0]
veor d10,d10,d10
vmlal.u32 q13,d29,d4[1]
vzip.16 d28,d10
vmlal.u32 q6,d29,d5[0]
vshr.u64 d24,d24,#16
vmlal.u32 q7,d29,d5[1]
vmlal.u32 q8,d29,d6[0]
vadd.u64 d24,d24,d25
vmlal.u32 q9,d29,d6[1]
vshr.u64 d24,d24,#16
vmlal.u32 q10,d29,d7[0]
vmlal.u32 q11,d29,d7[1]
vadd.u64 d26,d26,d24
vst1.32 {d29},[r10,:64]! @ put aside smashed m[8*i+6]
vmlal.u32 q13,d28,d0[0]
vld1.64 {q12},[r6,:128]!
vmlal.u32 q6,d28,d0[1]
veor d8,d8,d8
vmlal.u32 q7,d28,d1[0]
vshl.i64 d29,d27,#16
vmlal.u32 q8,d28,d1[1]
vadd.u64 d29,d29,d26
vmlal.u32 q9,d28,d2[0]
vmul.u32 d29,d29,d30
vmlal.u32 q10,d28,d2[1]
vst1.32 {d28},[r10,:64]! @ put aside smashed b[8*i+7]
vmlal.u32 q11,d28,d3[0]
vzip.16 d29,d8
vmlal.u32 q12,d28,d3[1]
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
vmlal.u32 q13,d29,d4[0]
vld1.32 {d0,d1,d2,d3},[r1]!
vmlal.u32 q6,d29,d4[1]
vmlal.u32 q7,d29,d5[0]
vshr.u64 d26,d26,#16
vmlal.u32 q8,d29,d5[1]
vmlal.u32 q9,d29,d6[0]
vadd.u64 d26,d26,d27
vmlal.u32 q10,d29,d6[1]
vshr.u64 d26,d26,#16
vmlal.u32 q11,d29,d7[0]
vmlal.u32 q12,d29,d7[1]
vadd.u64 d12,d12,d26
vst1.32 {d29},[r10,:64] @ put aside smashed m[8*i+7]
add r10,sp,#8 @ rewind
sub r8,r5,#8
b .LNEON_8n_inner
.align 4
.LNEON_8n_inner:
subs r8,r8,#8
vmlal.u32 q6,d28,d0[0]
vld1.64 {q13},[r6,:128]
vmlal.u32 q7,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+0]
vmlal.u32 q8,d28,d1[0]
vld1.32 {d4,d5,d6,d7},[r3]!
vmlal.u32 q9,d28,d1[1]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q10,d28,d2[0]
vmlal.u32 q11,d28,d2[1]
vmlal.u32 q12,d28,d3[0]
vmlal.u32 q13,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+1]
vmlal.u32 q6,d29,d4[0]
vmlal.u32 q7,d29,d4[1]
vmlal.u32 q8,d29,d5[0]
vmlal.u32 q9,d29,d5[1]
vmlal.u32 q10,d29,d6[0]
vmlal.u32 q11,d29,d6[1]
vmlal.u32 q12,d29,d7[0]
vmlal.u32 q13,d29,d7[1]
vst1.64 {q6},[r7,:128]!
vmlal.u32 q7,d28,d0[0]
vld1.64 {q6},[r6,:128]
vmlal.u32 q8,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+1]
vmlal.u32 q9,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q10,d28,d1[1]
vmlal.u32 q11,d28,d2[0]
vmlal.u32 q12,d28,d2[1]
vmlal.u32 q13,d28,d3[0]
vmlal.u32 q6,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+2]
vmlal.u32 q7,d29,d4[0]
vmlal.u32 q8,d29,d4[1]
vmlal.u32 q9,d29,d5[0]
vmlal.u32 q10,d29,d5[1]
vmlal.u32 q11,d29,d6[0]
vmlal.u32 q12,d29,d6[1]
vmlal.u32 q13,d29,d7[0]
vmlal.u32 q6,d29,d7[1]
vst1.64 {q7},[r7,:128]!
vmlal.u32 q8,d28,d0[0]
vld1.64 {q7},[r6,:128]
vmlal.u32 q9,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+2]
vmlal.u32 q10,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q11,d28,d1[1]
vmlal.u32 q12,d28,d2[0]
vmlal.u32 q13,d28,d2[1]
vmlal.u32 q6,d28,d3[0]
vmlal.u32 q7,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+3]
vmlal.u32 q8,d29,d4[0]
vmlal.u32 q9,d29,d4[1]
vmlal.u32 q10,d29,d5[0]
vmlal.u32 q11,d29,d5[1]
vmlal.u32 q12,d29,d6[0]
vmlal.u32 q13,d29,d6[1]
vmlal.u32 q6,d29,d7[0]
vmlal.u32 q7,d29,d7[1]
vst1.64 {q8},[r7,:128]!
vmlal.u32 q9,d28,d0[0]
vld1.64 {q8},[r6,:128]
vmlal.u32 q10,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+3]
vmlal.u32 q11,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q12,d28,d1[1]
vmlal.u32 q13,d28,d2[0]
vmlal.u32 q6,d28,d2[1]
vmlal.u32 q7,d28,d3[0]
vmlal.u32 q8,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+4]
vmlal.u32 q9,d29,d4[0]
vmlal.u32 q10,d29,d4[1]
vmlal.u32 q11,d29,d5[0]
vmlal.u32 q12,d29,d5[1]
vmlal.u32 q13,d29,d6[0]
vmlal.u32 q6,d29,d6[1]
vmlal.u32 q7,d29,d7[0]
vmlal.u32 q8,d29,d7[1]
vst1.64 {q9},[r7,:128]!
vmlal.u32 q10,d28,d0[0]
vld1.64 {q9},[r6,:128]
vmlal.u32 q11,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+4]
vmlal.u32 q12,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q13,d28,d1[1]
vmlal.u32 q6,d28,d2[0]
vmlal.u32 q7,d28,d2[1]
vmlal.u32 q8,d28,d3[0]
vmlal.u32 q9,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+5]
vmlal.u32 q10,d29,d4[0]
vmlal.u32 q11,d29,d4[1]
vmlal.u32 q12,d29,d5[0]
vmlal.u32 q13,d29,d5[1]
vmlal.u32 q6,d29,d6[0]
vmlal.u32 q7,d29,d6[1]
vmlal.u32 q8,d29,d7[0]
vmlal.u32 q9,d29,d7[1]
vst1.64 {q10},[r7,:128]!
vmlal.u32 q11,d28,d0[0]
vld1.64 {q10},[r6,:128]
vmlal.u32 q12,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+5]
vmlal.u32 q13,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q6,d28,d1[1]
vmlal.u32 q7,d28,d2[0]
vmlal.u32 q8,d28,d2[1]
vmlal.u32 q9,d28,d3[0]
vmlal.u32 q10,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+6]
vmlal.u32 q11,d29,d4[0]
vmlal.u32 q12,d29,d4[1]
vmlal.u32 q13,d29,d5[0]
vmlal.u32 q6,d29,d5[1]
vmlal.u32 q7,d29,d6[0]
vmlal.u32 q8,d29,d6[1]
vmlal.u32 q9,d29,d7[0]
vmlal.u32 q10,d29,d7[1]
vst1.64 {q11},[r7,:128]!
vmlal.u32 q12,d28,d0[0]
vld1.64 {q11},[r6,:128]
vmlal.u32 q13,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+6]
vmlal.u32 q6,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q7,d28,d1[1]
vmlal.u32 q8,d28,d2[0]
vmlal.u32 q9,d28,d2[1]
vmlal.u32 q10,d28,d3[0]
vmlal.u32 q11,d28,d3[1]
vld1.32 {d28},[r10,:64]! @ pull smashed b[8*i+7]
vmlal.u32 q12,d29,d4[0]
vmlal.u32 q13,d29,d4[1]
vmlal.u32 q6,d29,d5[0]
vmlal.u32 q7,d29,d5[1]
vmlal.u32 q8,d29,d6[0]
vmlal.u32 q9,d29,d6[1]
vmlal.u32 q10,d29,d7[0]
vmlal.u32 q11,d29,d7[1]
vst1.64 {q12},[r7,:128]!
vmlal.u32 q13,d28,d0[0]
vld1.64 {q12},[r6,:128]
vmlal.u32 q6,d28,d0[1]
vld1.32 {d29},[r10,:64]! @ pull smashed m[8*i+7]
vmlal.u32 q7,d28,d1[0]
it ne
addne r6,r6,#16 @ don't advance in last iteration
vmlal.u32 q8,d28,d1[1]
vmlal.u32 q9,d28,d2[0]
vmlal.u32 q10,d28,d2[1]
vmlal.u32 q11,d28,d3[0]
vmlal.u32 q12,d28,d3[1]
it eq
subeq r1,r1,r5,lsl#2 @ rewind
vmlal.u32 q13,d29,d4[0]
vld1.32 {d28},[sp,:64] @ pull smashed b[8*i+0]
vmlal.u32 q6,d29,d4[1]
vld1.32 {d0,d1,d2,d3},[r1]!
vmlal.u32 q7,d29,d5[0]
add r10,sp,#8 @ rewind
vmlal.u32 q8,d29,d5[1]
vmlal.u32 q9,d29,d6[0]
vmlal.u32 q10,d29,d6[1]
vmlal.u32 q11,d29,d7[0]
vst1.64 {q13},[r7,:128]!
vmlal.u32 q12,d29,d7[1]
bne .LNEON_8n_inner
add r6,sp,#128
vst1.64 {q6,q7},[r7,:256]!
veor q2,q2,q2 @ d4-d5
vst1.64 {q8,q9},[r7,:256]!
veor q3,q3,q3 @ d6-d7
vst1.64 {q10,q11},[r7,:256]!
vst1.64 {q12},[r7,:128]
subs r9,r9,#8
vld1.64 {q6,q7},[r6,:256]!
vld1.64 {q8,q9},[r6,:256]!
vld1.64 {q10,q11},[r6,:256]!
vld1.64 {q12,q13},[r6,:256]!
itt ne
subne r3,r3,r5,lsl#2 @ rewind
bne .LNEON_8n_outer
add r7,sp,#128
vst1.64 {q2,q3}, [sp,:256]! @ start wiping stack frame
vshr.u64 d10,d12,#16
vst1.64 {q2,q3},[sp,:256]!
vadd.u64 d13,d13,d10
vst1.64 {q2,q3}, [sp,:256]!
vshr.u64 d10,d13,#16
vst1.64 {q2,q3}, [sp,:256]!
vzip.16 d12,d13
mov r8,r5
b .LNEON_tail_entry
.align 4
.LNEON_tail:
vadd.u64 d12,d12,d10
vshr.u64 d10,d12,#16
vld1.64 {q8,q9}, [r6, :256]!
vadd.u64 d13,d13,d10
vld1.64 {q10,q11}, [r6, :256]!
vshr.u64 d10,d13,#16
vld1.64 {q12,q13}, [r6, :256]!
vzip.16 d12,d13
.LNEON_tail_entry:
vadd.u64 d14,d14,d10
vst1.32 {d12[0]}, [r7, :32]!
vshr.u64 d10,d14,#16
vadd.u64 d15,d15,d10
vshr.u64 d10,d15,#16
vzip.16 d14,d15
vadd.u64 d16,d16,d10
vst1.32 {d14[0]}, [r7, :32]!
vshr.u64 d10,d16,#16
vadd.u64 d17,d17,d10
vshr.u64 d10,d17,#16
vzip.16 d16,d17
vadd.u64 d18,d18,d10
vst1.32 {d16[0]}, [r7, :32]!
vshr.u64 d10,d18,#16
vadd.u64 d19,d19,d10
vshr.u64 d10,d19,#16
vzip.16 d18,d19
vadd.u64 d20,d20,d10
vst1.32 {d18[0]}, [r7, :32]!
vshr.u64 d10,d20,#16
vadd.u64 d21,d21,d10
vshr.u64 d10,d21,#16
vzip.16 d20,d21
vadd.u64 d22,d22,d10
vst1.32 {d20[0]}, [r7, :32]!
vshr.u64 d10,d22,#16
vadd.u64 d23,d23,d10
vshr.u64 d10,d23,#16
vzip.16 d22,d23
vadd.u64 d24,d24,d10
vst1.32 {d22[0]}, [r7, :32]!
vshr.u64 d10,d24,#16
vadd.u64 d25,d25,d10
vshr.u64 d10,d25,#16
vzip.16 d24,d25
vadd.u64 d26,d26,d10
vst1.32 {d24[0]}, [r7, :32]!
vshr.u64 d10,d26,#16
vadd.u64 d27,d27,d10
vshr.u64 d10,d27,#16
vzip.16 d26,d27
vld1.64 {q6,q7}, [r6, :256]!
subs r8,r8,#8
vst1.32 {d26[0]}, [r7, :32]!
bne .LNEON_tail
vst1.32 {d10[0]}, [r7, :32] @ top-most bit
sub r3,r3,r5,lsl#2 @ rewind r3
subs r1,sp,#0 @ clear carry flag
add r2,sp,r5,lsl#2
.LNEON_sub:
ldmia r1!, {r4,r5,r6,r7}
ldmia r3!, {r8,r9,r10,r11}
sbcs r8, r4,r8
sbcs r9, r5,r9
sbcs r10,r6,r10
sbcs r11,r7,r11
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne .LNEON_sub
ldr r10, [r1] @ load top-most bit
mov r11,sp
veor q0,q0,q0
sub r11,r2,r11 @ this is num*4
veor q1,q1,q1
mov r1,sp
sub r0,r0,r11 @ rewind r0
mov r3,r2 @ second 3/4th of frame
sbcs r10,r10,#0 @ result is carry flag
.LNEON_copy_n_zap:
ldmia r1!, {r4,r5,r6,r7}
ldmia r0, {r8,r9,r10,r11}
it cc
movcc r8, r4
vst1.64 {q0,q1}, [r3,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
it cc
movcc r11,r7
ldmia r1, {r4,r5,r6,r7}
stmia r0!, {r8,r9,r10,r11}
sub r1,r1,#16
ldmia r0, {r8,r9,r10,r11}
it cc
movcc r8, r4
vst1.64 {q0,q1}, [r1,:256]! @ wipe
itt cc
movcc r9, r5
movcc r10,r6
vst1.64 {q0,q1}, [r3,:256]! @ wipe
it cc
movcc r11,r7
teq r1,r2 @ preserves carry
stmia r0!, {r8,r9,r10,r11}
bne .LNEON_copy_n_zap
mov sp,ip
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
bx lr @ bx lr
.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
#endif
.byte 77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
.hidden OPENSSL_armcap_P
#endif
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits
#endif // defined(__arm__) && defined(__linux__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,316 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.] */
#include <CBigNumBoringSSL_bn.h>
#include <string.h>
#include <CBigNumBoringSSL_err.h>
#include <CBigNumBoringSSL_mem.h>
#include "internal.h"
int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) {
const BIGNUM *tmp;
int a_neg = a->neg, ret;
// a + b a+b
// a + -b a-b
// -a + b b-a
// -a + -b -(a+b)
if (a_neg ^ b->neg) {
// only one is negative
if (a_neg) {
tmp = a;
a = b;
b = tmp;
}
// we are now a - b
if (BN_ucmp(a, b) < 0) {
if (!BN_usub(r, b, a)) {
return 0;
}
r->neg = 1;
} else {
if (!BN_usub(r, a, b)) {
return 0;
}
r->neg = 0;
}
return 1;
}
ret = BN_uadd(r, a, b);
r->neg = a_neg;
return ret;
}
int bn_uadd_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) {
// Widths are public, so we normalize to make |a| the larger one.
if (a->width < b->width) {
const BIGNUM *tmp = a;
a = b;
b = tmp;
}
int max = a->width;
int min = b->width;
if (!bn_wexpand(r, max + 1)) {
return 0;
}
r->width = max + 1;
BN_ULONG carry = bn_add_words(r->d, a->d, b->d, min);
for (int i = min; i < max; i++) {
// |r| and |a| may alias, so use a temporary.
BN_ULONG tmp = carry + a->d[i];
carry = tmp < a->d[i];
r->d[i] = tmp;
}
r->d[max] = carry;
return 1;
}
int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) {
if (!bn_uadd_consttime(r, a, b)) {
return 0;
}
bn_set_minimal_width(r);
return 1;
}
int BN_add_word(BIGNUM *a, BN_ULONG w) {
BN_ULONG l;
int i;
// degenerate case: w is zero
if (!w) {
return 1;
}
// degenerate case: a is zero
if (BN_is_zero(a)) {
return BN_set_word(a, w);
}
// handle 'a' when negative
if (a->neg) {
a->neg = 0;
i = BN_sub_word(a, w);
if (!BN_is_zero(a)) {
a->neg = !(a->neg);
}
return i;
}
for (i = 0; w != 0 && i < a->width; i++) {
a->d[i] = l = a->d[i] + w;
w = (w > l) ? 1 : 0;
}
if (w && i == a->width) {
if (!bn_wexpand(a, a->width + 1)) {
return 0;
}
a->width++;
a->d[i] = w;
}
return 1;
}
int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) {
int add = 0, neg = 0;
const BIGNUM *tmp;
// a - b a-b
// a - -b a+b
// -a - b -(a+b)
// -a - -b b-a
if (a->neg) {
if (b->neg) {
tmp = a;
a = b;
b = tmp;
} else {
add = 1;
neg = 1;
}
} else {
if (b->neg) {
add = 1;
neg = 0;
}
}
if (add) {
if (!BN_uadd(r, a, b)) {
return 0;
}
r->neg = neg;
return 1;
}
if (BN_ucmp(a, b) < 0) {
if (!BN_usub(r, b, a)) {
return 0;
}
r->neg = 1;
} else {
if (!BN_usub(r, a, b)) {
return 0;
}
r->neg = 0;
}
return 1;
}
int bn_usub_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) {
// |b| may have more words than |a| given non-minimal inputs, but all words
// beyond |a->width| must then be zero.
int b_width = b->width;
if (b_width > a->width) {
if (!bn_fits_in_words(b, a->width)) {
OPENSSL_PUT_ERROR(BN, BN_R_ARG2_LT_ARG3);
return 0;
}
b_width = a->width;
}
if (!bn_wexpand(r, a->width)) {
return 0;
}
BN_ULONG borrow = bn_sub_words(r->d, a->d, b->d, b_width);
for (int i = b_width; i < a->width; i++) {
// |r| and |a| may alias, so use a temporary.
BN_ULONG tmp = a->d[i];
r->d[i] = a->d[i] - borrow;
borrow = tmp < r->d[i];
}
if (borrow) {
OPENSSL_PUT_ERROR(BN, BN_R_ARG2_LT_ARG3);
return 0;
}
r->width = a->width;
r->neg = 0;
return 1;
}
int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) {
if (!bn_usub_consttime(r, a, b)) {
return 0;
}
bn_set_minimal_width(r);
return 1;
}
int BN_sub_word(BIGNUM *a, BN_ULONG w) {
int i;
// degenerate case: w is zero
if (!w) {
return 1;
}
// degenerate case: a is zero
if (BN_is_zero(a)) {
i = BN_set_word(a, w);
if (i != 0) {
BN_set_negative(a, 1);
}
return i;
}
// handle 'a' when negative
if (a->neg) {
a->neg = 0;
i = BN_add_word(a, w);
a->neg = 1;
return i;
}
if ((bn_minimal_width(a) == 1) && (a->d[0] < w)) {
a->d[0] = w - a->d[0];
a->neg = 1;
return 1;
}
i = 0;
for (;;) {
if (a->d[i] >= w) {
a->d[i] -= w;
break;
} else {
a->d[i] -= w;
i++;
w = 1;
}
}
if ((a->d[i] == 0) && (i == (a->width - 1))) {
a->width--;
}
return 1;
}

View File

@ -0,0 +1,541 @@
/* x86_64 BIGNUM accelerator version 0.1, December 2002.
*
* Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
* project.
*
* Rights for redistribution and usage in source and binary forms are
* granted according to the OpenSSL license. Warranty of any kind is
* disclaimed.
*
* Q. Version 0.1? It doesn't sound like Andy, he used to assign real
* versions, like 1.0...
* A. Well, that's because this code is basically a quick-n-dirty
* proof-of-concept hack. As you can see it's implemented with
* inline assembler, which means that you're bound to GCC and that
* there might be enough room for further improvement.
*
* Q. Why inline assembler?
* A. x86_64 features own ABI which I'm not familiar with. This is
* why I decided to let the compiler take care of subroutine
* prologue/epilogue as well as register allocation. For reference.
* Win64 implements different ABI for AMD64, different from Linux.
*
* Q. How much faster does it get?
* A. 'apps/openssl speed rsa dsa' output with no-asm:
*
* sign verify sign/s verify/s
* rsa 512 bits 0.0006s 0.0001s 1683.8 18456.2
* rsa 1024 bits 0.0028s 0.0002s 356.0 6407.0
* rsa 2048 bits 0.0172s 0.0005s 58.0 1957.8
* rsa 4096 bits 0.1155s 0.0018s 8.7 555.6
* sign verify sign/s verify/s
* dsa 512 bits 0.0005s 0.0006s 2100.8 1768.3
* dsa 1024 bits 0.0014s 0.0018s 692.3 559.2
* dsa 2048 bits 0.0049s 0.0061s 204.7 165.0
*
* 'apps/openssl speed rsa dsa' output with this module:
*
* sign verify sign/s verify/s
* rsa 512 bits 0.0004s 0.0000s 2767.1 33297.9
* rsa 1024 bits 0.0012s 0.0001s 867.4 14674.7
* rsa 2048 bits 0.0061s 0.0002s 164.0 5270.0
* rsa 4096 bits 0.0384s 0.0006s 26.1 1650.8
* sign verify sign/s verify/s
* dsa 512 bits 0.0002s 0.0003s 4442.2 3786.3
* dsa 1024 bits 0.0005s 0.0007s 1835.1 1497.4
* dsa 2048 bits 0.0016s 0.0020s 620.4 504.6
*
* For the reference. IA-32 assembler implementation performs
* very much like 64-bit code compiled with no-asm on the same
* machine.
*/
#include <CBigNumBoringSSL_bn.h>
// TODO(davidben): Get this file working on MSVC x64.
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
(defined(__GNUC__) || defined(__clang__))
#include "../internal.h"
#undef mul
#undef mul_add
// "m"(a), "+m"(r) is the way to favor DirectPath µ-code;
// "g"(0) let the compiler to decide where does it
// want to keep the value of zero;
#define mul_add(r, a, word, carry) \
do { \
register BN_ULONG high, low; \
__asm__("mulq %3" : "=a"(low), "=d"(high) : "a"(word), "m"(a) : "cc"); \
__asm__("addq %2,%0; adcq %3,%1" \
: "+r"(carry), "+d"(high) \
: "a"(low), "g"(0) \
: "cc"); \
__asm__("addq %2,%0; adcq %3,%1" \
: "+m"(r), "+d"(high) \
: "r"(carry), "g"(0) \
: "cc"); \
(carry) = high; \
} while (0)
#define mul(r, a, word, carry) \
do { \
register BN_ULONG high, low; \
__asm__("mulq %3" : "=a"(low), "=d"(high) : "a"(word), "g"(a) : "cc"); \
__asm__("addq %2,%0; adcq %3,%1" \
: "+r"(carry), "+d"(high) \
: "a"(low), "g"(0) \
: "cc"); \
(r) = (carry); \
(carry) = high; \
} while (0)
#undef sqr
#define sqr(r0, r1, a) __asm__("mulq %2" : "=a"(r0), "=d"(r1) : "a"(a) : "cc");
BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
BN_ULONG w) {
BN_ULONG c1 = 0;
if (num == 0) {
return (c1);
}
while (num & ~3) {
mul_add(rp[0], ap[0], w, c1);
mul_add(rp[1], ap[1], w, c1);
mul_add(rp[2], ap[2], w, c1);
mul_add(rp[3], ap[3], w, c1);
ap += 4;
rp += 4;
num -= 4;
}
if (num) {
mul_add(rp[0], ap[0], w, c1);
if (--num == 0) {
return c1;
}
mul_add(rp[1], ap[1], w, c1);
if (--num == 0) {
return c1;
}
mul_add(rp[2], ap[2], w, c1);
return c1;
}
return c1;
}
BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
BN_ULONG w) {
BN_ULONG c1 = 0;
if (num == 0) {
return c1;
}
while (num & ~3) {
mul(rp[0], ap[0], w, c1);
mul(rp[1], ap[1], w, c1);
mul(rp[2], ap[2], w, c1);
mul(rp[3], ap[3], w, c1);
ap += 4;
rp += 4;
num -= 4;
}
if (num) {
mul(rp[0], ap[0], w, c1);
if (--num == 0) {
return c1;
}
mul(rp[1], ap[1], w, c1);
if (--num == 0) {
return c1;
}
mul(rp[2], ap[2], w, c1);
}
return c1;
}
void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, size_t n) {
if (n == 0) {
return;
}
while (n & ~3) {
sqr(r[0], r[1], a[0]);
sqr(r[2], r[3], a[1]);
sqr(r[4], r[5], a[2]);
sqr(r[6], r[7], a[3]);
a += 4;
r += 8;
n -= 4;
}
if (n) {
sqr(r[0], r[1], a[0]);
if (--n == 0) {
return;
}
sqr(r[2], r[3], a[1]);
if (--n == 0) {
return;
}
sqr(r[4], r[5], a[2]);
}
}
BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
size_t n) {
BN_ULONG ret;
size_t i = 0;
if (n == 0) {
return 0;
}
__asm__ volatile (
" subq %0,%0 \n" // clear carry
" jmp 1f \n"
".p2align 4 \n"
"1:"
" movq (%4,%2,8),%0 \n"
" adcq (%5,%2,8),%0 \n"
" movq %0,(%3,%2,8) \n"
" lea 1(%2),%2 \n"
" dec %1 \n"
" jnz 1b \n"
" sbbq %0,%0 \n"
: "=&r"(ret), "+c"(n), "+r"(i)
: "r"(rp), "r"(ap), "r"(bp)
: "cc", "memory");
return ret & 1;
}
BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
size_t n) {
BN_ULONG ret;
size_t i = 0;
if (n == 0) {
return 0;
}
__asm__ volatile (
" subq %0,%0 \n" // clear borrow
" jmp 1f \n"
".p2align 4 \n"
"1:"
" movq (%4,%2,8),%0 \n"
" sbbq (%5,%2,8),%0 \n"
" movq %0,(%3,%2,8) \n"
" lea 1(%2),%2 \n"
" dec %1 \n"
" jnz 1b \n"
" sbbq %0,%0 \n"
: "=&r"(ret), "+c"(n), "+r"(i)
: "r"(rp), "r"(ap), "r"(bp)
: "cc", "memory");
return ret & 1;
}
// mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0)
// mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0)
// sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0)
// sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0)
// Keep in mind that carrying into high part of multiplication result can not
// overflow, because it cannot be all-ones.
#define mul_add_c(a, b, c0, c1, c2) \
do { \
BN_ULONG t1, t2; \
__asm__("mulq %3" : "=a"(t1), "=d"(t2) : "a"(a), "m"(b) : "cc"); \
__asm__("addq %3,%0; adcq %4,%1; adcq %5,%2" \
: "+r"(c0), "+r"(c1), "+r"(c2) \
: "r"(t1), "r"(t2), "g"(0) \
: "cc"); \
} while (0)
#define sqr_add_c(a, i, c0, c1, c2) \
do { \
BN_ULONG t1, t2; \
__asm__("mulq %2" : "=a"(t1), "=d"(t2) : "a"((a)[i]) : "cc"); \
__asm__("addq %3,%0; adcq %4,%1; adcq %5,%2" \
: "+r"(c0), "+r"(c1), "+r"(c2) \
: "r"(t1), "r"(t2), "g"(0) \
: "cc"); \
} while (0)
#define mul_add_c2(a, b, c0, c1, c2) \
do { \
BN_ULONG t1, t2; \
__asm__("mulq %3" : "=a"(t1), "=d"(t2) : "a"(a), "m"(b) : "cc"); \
__asm__("addq %3,%0; adcq %4,%1; adcq %5,%2" \
: "+r"(c0), "+r"(c1), "+r"(c2) \
: "r"(t1), "r"(t2), "g"(0) \
: "cc"); \
__asm__("addq %3,%0; adcq %4,%1; adcq %5,%2" \
: "+r"(c0), "+r"(c1), "+r"(c2) \
: "r"(t1), "r"(t2), "g"(0) \
: "cc"); \
} while (0)
#define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]) {
BN_ULONG c1, c2, c3;
c1 = 0;
c2 = 0;
c3 = 0;
mul_add_c(a[0], b[0], c1, c2, c3);
r[0] = c1;
c1 = 0;
mul_add_c(a[0], b[1], c2, c3, c1);
mul_add_c(a[1], b[0], c2, c3, c1);
r[1] = c2;
c2 = 0;
mul_add_c(a[2], b[0], c3, c1, c2);
mul_add_c(a[1], b[1], c3, c1, c2);
mul_add_c(a[0], b[2], c3, c1, c2);
r[2] = c3;
c3 = 0;
mul_add_c(a[0], b[3], c1, c2, c3);
mul_add_c(a[1], b[2], c1, c2, c3);
mul_add_c(a[2], b[1], c1, c2, c3);
mul_add_c(a[3], b[0], c1, c2, c3);
r[3] = c1;
c1 = 0;
mul_add_c(a[4], b[0], c2, c3, c1);
mul_add_c(a[3], b[1], c2, c3, c1);
mul_add_c(a[2], b[2], c2, c3, c1);
mul_add_c(a[1], b[3], c2, c3, c1);
mul_add_c(a[0], b[4], c2, c3, c1);
r[4] = c2;
c2 = 0;
mul_add_c(a[0], b[5], c3, c1, c2);
mul_add_c(a[1], b[4], c3, c1, c2);
mul_add_c(a[2], b[3], c3, c1, c2);
mul_add_c(a[3], b[2], c3, c1, c2);
mul_add_c(a[4], b[1], c3, c1, c2);
mul_add_c(a[5], b[0], c3, c1, c2);
r[5] = c3;
c3 = 0;
mul_add_c(a[6], b[0], c1, c2, c3);
mul_add_c(a[5], b[1], c1, c2, c3);
mul_add_c(a[4], b[2], c1, c2, c3);
mul_add_c(a[3], b[3], c1, c2, c3);
mul_add_c(a[2], b[4], c1, c2, c3);
mul_add_c(a[1], b[5], c1, c2, c3);
mul_add_c(a[0], b[6], c1, c2, c3);
r[6] = c1;
c1 = 0;
mul_add_c(a[0], b[7], c2, c3, c1);
mul_add_c(a[1], b[6], c2, c3, c1);
mul_add_c(a[2], b[5], c2, c3, c1);
mul_add_c(a[3], b[4], c2, c3, c1);
mul_add_c(a[4], b[3], c2, c3, c1);
mul_add_c(a[5], b[2], c2, c3, c1);
mul_add_c(a[6], b[1], c2, c3, c1);
mul_add_c(a[7], b[0], c2, c3, c1);
r[7] = c2;
c2 = 0;
mul_add_c(a[7], b[1], c3, c1, c2);
mul_add_c(a[6], b[2], c3, c1, c2);
mul_add_c(a[5], b[3], c3, c1, c2);
mul_add_c(a[4], b[4], c3, c1, c2);
mul_add_c(a[3], b[5], c3, c1, c2);
mul_add_c(a[2], b[6], c3, c1, c2);
mul_add_c(a[1], b[7], c3, c1, c2);
r[8] = c3;
c3 = 0;
mul_add_c(a[2], b[7], c1, c2, c3);
mul_add_c(a[3], b[6], c1, c2, c3);
mul_add_c(a[4], b[5], c1, c2, c3);
mul_add_c(a[5], b[4], c1, c2, c3);
mul_add_c(a[6], b[3], c1, c2, c3);
mul_add_c(a[7], b[2], c1, c2, c3);
r[9] = c1;
c1 = 0;
mul_add_c(a[7], b[3], c2, c3, c1);
mul_add_c(a[6], b[4], c2, c3, c1);
mul_add_c(a[5], b[5], c2, c3, c1);
mul_add_c(a[4], b[6], c2, c3, c1);
mul_add_c(a[3], b[7], c2, c3, c1);
r[10] = c2;
c2 = 0;
mul_add_c(a[4], b[7], c3, c1, c2);
mul_add_c(a[5], b[6], c3, c1, c2);
mul_add_c(a[6], b[5], c3, c1, c2);
mul_add_c(a[7], b[4], c3, c1, c2);
r[11] = c3;
c3 = 0;
mul_add_c(a[7], b[5], c1, c2, c3);
mul_add_c(a[6], b[6], c1, c2, c3);
mul_add_c(a[5], b[7], c1, c2, c3);
r[12] = c1;
c1 = 0;
mul_add_c(a[6], b[7], c2, c3, c1);
mul_add_c(a[7], b[6], c2, c3, c1);
r[13] = c2;
c2 = 0;
mul_add_c(a[7], b[7], c3, c1, c2);
r[14] = c3;
r[15] = c1;
}
void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]) {
BN_ULONG c1, c2, c3;
c1 = 0;
c2 = 0;
c3 = 0;
mul_add_c(a[0], b[0], c1, c2, c3);
r[0] = c1;
c1 = 0;
mul_add_c(a[0], b[1], c2, c3, c1);
mul_add_c(a[1], b[0], c2, c3, c1);
r[1] = c2;
c2 = 0;
mul_add_c(a[2], b[0], c3, c1, c2);
mul_add_c(a[1], b[1], c3, c1, c2);
mul_add_c(a[0], b[2], c3, c1, c2);
r[2] = c3;
c3 = 0;
mul_add_c(a[0], b[3], c1, c2, c3);
mul_add_c(a[1], b[2], c1, c2, c3);
mul_add_c(a[2], b[1], c1, c2, c3);
mul_add_c(a[3], b[0], c1, c2, c3);
r[3] = c1;
c1 = 0;
mul_add_c(a[3], b[1], c2, c3, c1);
mul_add_c(a[2], b[2], c2, c3, c1);
mul_add_c(a[1], b[3], c2, c3, c1);
r[4] = c2;
c2 = 0;
mul_add_c(a[2], b[3], c3, c1, c2);
mul_add_c(a[3], b[2], c3, c1, c2);
r[5] = c3;
c3 = 0;
mul_add_c(a[3], b[3], c1, c2, c3);
r[6] = c1;
r[7] = c2;
}
void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[8]) {
BN_ULONG c1, c2, c3;
c1 = 0;
c2 = 0;
c3 = 0;
sqr_add_c(a, 0, c1, c2, c3);
r[0] = c1;
c1 = 0;
sqr_add_c2(a, 1, 0, c2, c3, c1);
r[1] = c2;
c2 = 0;
sqr_add_c(a, 1, c3, c1, c2);
sqr_add_c2(a, 2, 0, c3, c1, c2);
r[2] = c3;
c3 = 0;
sqr_add_c2(a, 3, 0, c1, c2, c3);
sqr_add_c2(a, 2, 1, c1, c2, c3);
r[3] = c1;
c1 = 0;
sqr_add_c(a, 2, c2, c3, c1);
sqr_add_c2(a, 3, 1, c2, c3, c1);
sqr_add_c2(a, 4, 0, c2, c3, c1);
r[4] = c2;
c2 = 0;
sqr_add_c2(a, 5, 0, c3, c1, c2);
sqr_add_c2(a, 4, 1, c3, c1, c2);
sqr_add_c2(a, 3, 2, c3, c1, c2);
r[5] = c3;
c3 = 0;
sqr_add_c(a, 3, c1, c2, c3);
sqr_add_c2(a, 4, 2, c1, c2, c3);
sqr_add_c2(a, 5, 1, c1, c2, c3);
sqr_add_c2(a, 6, 0, c1, c2, c3);
r[6] = c1;
c1 = 0;
sqr_add_c2(a, 7, 0, c2, c3, c1);
sqr_add_c2(a, 6, 1, c2, c3, c1);
sqr_add_c2(a, 5, 2, c2, c3, c1);
sqr_add_c2(a, 4, 3, c2, c3, c1);
r[7] = c2;
c2 = 0;
sqr_add_c(a, 4, c3, c1, c2);
sqr_add_c2(a, 5, 3, c3, c1, c2);
sqr_add_c2(a, 6, 2, c3, c1, c2);
sqr_add_c2(a, 7, 1, c3, c1, c2);
r[8] = c3;
c3 = 0;
sqr_add_c2(a, 7, 2, c1, c2, c3);
sqr_add_c2(a, 6, 3, c1, c2, c3);
sqr_add_c2(a, 5, 4, c1, c2, c3);
r[9] = c1;
c1 = 0;
sqr_add_c(a, 5, c2, c3, c1);
sqr_add_c2(a, 6, 4, c2, c3, c1);
sqr_add_c2(a, 7, 3, c2, c3, c1);
r[10] = c2;
c2 = 0;
sqr_add_c2(a, 7, 4, c3, c1, c2);
sqr_add_c2(a, 6, 5, c3, c1, c2);
r[11] = c3;
c3 = 0;
sqr_add_c(a, 6, c1, c2, c3);
sqr_add_c2(a, 7, 5, c1, c2, c3);
r[12] = c1;
c1 = 0;
sqr_add_c2(a, 7, 6, c2, c3, c1);
r[13] = c2;
c2 = 0;
sqr_add_c(a, 7, c3, c1, c2);
r[14] = c3;
r[15] = c1;
}
void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]) {
BN_ULONG c1, c2, c3;
c1 = 0;
c2 = 0;
c3 = 0;
sqr_add_c(a, 0, c1, c2, c3);
r[0] = c1;
c1 = 0;
sqr_add_c2(a, 1, 0, c2, c3, c1);
r[1] = c2;
c2 = 0;
sqr_add_c(a, 1, c3, c1, c2);
sqr_add_c2(a, 2, 0, c3, c1, c2);
r[2] = c3;
c3 = 0;
sqr_add_c2(a, 3, 0, c1, c2, c3);
sqr_add_c2(a, 2, 1, c1, c2, c3);
r[3] = c1;
c1 = 0;
sqr_add_c(a, 2, c2, c3, c1);
sqr_add_c2(a, 3, 1, c2, c3, c1);
r[4] = c2;
c2 = 0;
sqr_add_c2(a, 3, 2, c3, c1, c2);
r[5] = c3;
c3 = 0;
sqr_add_c(a, 3, c1, c2, c3);
r[6] = c1;
r[7] = c2;
}
#undef mul_add
#undef mul
#undef sqr
#undef mul_add_c
#undef sqr_add_c
#undef mul_add_c2
#undef sqr_add_c2
#endif // !NO_ASM && X86_64 && (__GNUC__ || __clang__)

View File

@ -0,0 +1,445 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.] */
#include <CBigNumBoringSSL_bn.h>
#include <limits.h>
#include <string.h>
#include <CBigNumBoringSSL_err.h>
#include <CBigNumBoringSSL_mem.h>
#include "internal.h"
#include "../delocate.h"
BIGNUM *BN_new(void) {
BIGNUM *bn = OPENSSL_malloc(sizeof(BIGNUM));
if (bn == NULL) {
OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
return NULL;
}
OPENSSL_memset(bn, 0, sizeof(BIGNUM));
bn->flags = BN_FLG_MALLOCED;
return bn;
}
void BN_init(BIGNUM *bn) {
OPENSSL_memset(bn, 0, sizeof(BIGNUM));
}
void BN_free(BIGNUM *bn) {
if (bn == NULL) {
return;
}
if ((bn->flags & BN_FLG_STATIC_DATA) == 0) {
OPENSSL_free(bn->d);
}
if (bn->flags & BN_FLG_MALLOCED) {
OPENSSL_free(bn);
} else {
bn->d = NULL;
}
}
void BN_clear_free(BIGNUM *bn) {
char should_free;
if (bn == NULL) {
return;
}
if (bn->d != NULL) {
if ((bn->flags & BN_FLG_STATIC_DATA) == 0) {
OPENSSL_free(bn->d);
} else {
OPENSSL_cleanse(bn->d, bn->dmax * sizeof(bn->d[0]));
}
}
should_free = (bn->flags & BN_FLG_MALLOCED) != 0;
if (should_free) {
OPENSSL_free(bn);
} else {
OPENSSL_cleanse(bn, sizeof(BIGNUM));
}
}
BIGNUM *BN_dup(const BIGNUM *src) {
BIGNUM *copy;
if (src == NULL) {
return NULL;
}
copy = BN_new();
if (copy == NULL) {
return NULL;
}
if (!BN_copy(copy, src)) {
BN_free(copy);
return NULL;
}
return copy;
}
BIGNUM *BN_copy(BIGNUM *dest, const BIGNUM *src) {
if (src == dest) {
return dest;
}
if (!bn_wexpand(dest, src->width)) {
return NULL;
}
OPENSSL_memcpy(dest->d, src->d, sizeof(src->d[0]) * src->width);
dest->width = src->width;
dest->neg = src->neg;
return dest;
}
void BN_clear(BIGNUM *bn) {
if (bn->d != NULL) {
OPENSSL_memset(bn->d, 0, bn->dmax * sizeof(bn->d[0]));
}
bn->width = 0;
bn->neg = 0;
}
DEFINE_METHOD_FUNCTION(BIGNUM, BN_value_one) {
static const BN_ULONG kOneLimbs[1] = { 1 };
out->d = (BN_ULONG*) kOneLimbs;
out->width = 1;
out->dmax = 1;
out->neg = 0;
out->flags = BN_FLG_STATIC_DATA;
}
// BN_num_bits_word returns the minimum number of bits needed to represent the
// value in |l|.
unsigned BN_num_bits_word(BN_ULONG l) {
// |BN_num_bits| is often called on RSA prime factors. These have public bit
// lengths, but all bits beyond the high bit are secret, so count bits in
// constant time.
BN_ULONG x, mask;
int bits = (l != 0);
#if BN_BITS2 > 32
// Look at the upper half of |x|. |x| is at most 64 bits long.
x = l >> 32;
// Set |mask| to all ones if |x| (the top 32 bits of |l|) is non-zero and all
// all zeros otherwise.
mask = 0u - x;
mask = (0u - (mask >> (BN_BITS2 - 1)));
// If |x| is non-zero, the lower half is included in the bit count in full,
// and we count the upper half. Otherwise, we count the lower half.
bits += 32 & mask;
l ^= (x ^ l) & mask; // |l| is |x| if |mask| and remains |l| otherwise.
#endif
// The remaining blocks are analogous iterations at lower powers of two.
x = l >> 16;
mask = 0u - x;
mask = (0u - (mask >> (BN_BITS2 - 1)));
bits += 16 & mask;
l ^= (x ^ l) & mask;
x = l >> 8;
mask = 0u - x;
mask = (0u - (mask >> (BN_BITS2 - 1)));
bits += 8 & mask;
l ^= (x ^ l) & mask;
x = l >> 4;
mask = 0u - x;
mask = (0u - (mask >> (BN_BITS2 - 1)));
bits += 4 & mask;
l ^= (x ^ l) & mask;
x = l >> 2;
mask = 0u - x;
mask = (0u - (mask >> (BN_BITS2 - 1)));
bits += 2 & mask;
l ^= (x ^ l) & mask;
x = l >> 1;
mask = 0u - x;
mask = (0u - (mask >> (BN_BITS2 - 1)));
bits += 1 & mask;
return bits;
}
unsigned BN_num_bits(const BIGNUM *bn) {
const int width = bn_minimal_width(bn);
if (width == 0) {
return 0;
}
return (width - 1) * BN_BITS2 + BN_num_bits_word(bn->d[width - 1]);
}
unsigned BN_num_bytes(const BIGNUM *bn) {
return (BN_num_bits(bn) + 7) / 8;
}
void BN_zero(BIGNUM *bn) {
bn->width = bn->neg = 0;
}
int BN_one(BIGNUM *bn) {
return BN_set_word(bn, 1);
}
int BN_set_word(BIGNUM *bn, BN_ULONG value) {
if (value == 0) {
BN_zero(bn);
return 1;
}
if (!bn_wexpand(bn, 1)) {
return 0;
}
bn->neg = 0;
bn->d[0] = value;
bn->width = 1;
return 1;
}
int BN_set_u64(BIGNUM *bn, uint64_t value) {
#if BN_BITS2 == 64
return BN_set_word(bn, value);
#elif BN_BITS2 == 32
if (value <= BN_MASK2) {
return BN_set_word(bn, (BN_ULONG)value);
}
if (!bn_wexpand(bn, 2)) {
return 0;
}
bn->neg = 0;
bn->d[0] = (BN_ULONG)value;
bn->d[1] = (BN_ULONG)(value >> 32);
bn->width = 2;
return 1;
#else
#error "BN_BITS2 must be 32 or 64."
#endif
}
int bn_set_words(BIGNUM *bn, const BN_ULONG *words, size_t num) {
if (!bn_wexpand(bn, num)) {
return 0;
}
OPENSSL_memmove(bn->d, words, num * sizeof(BN_ULONG));
// |bn_wexpand| verified that |num| isn't too large.
bn->width = (int)num;
bn->neg = 0;
return 1;
}
int bn_fits_in_words(const BIGNUM *bn, size_t num) {
// All words beyond |num| must be zero.
BN_ULONG mask = 0;
for (size_t i = num; i < (size_t)bn->width; i++) {
mask |= bn->d[i];
}
return mask == 0;
}
int bn_copy_words(BN_ULONG *out, size_t num, const BIGNUM *bn) {
if (bn->neg) {
OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
return 0;
}
size_t width = (size_t)bn->width;
if (width > num) {
if (!bn_fits_in_words(bn, num)) {
OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
return 0;
}
width = num;
}
OPENSSL_memset(out, 0, sizeof(BN_ULONG) * num);
OPENSSL_memcpy(out, bn->d, sizeof(BN_ULONG) * width);
return 1;
}
int BN_is_negative(const BIGNUM *bn) {
return bn->neg != 0;
}
void BN_set_negative(BIGNUM *bn, int sign) {
if (sign && !BN_is_zero(bn)) {
bn->neg = 1;
} else {
bn->neg = 0;
}
}
int bn_wexpand(BIGNUM *bn, size_t words) {
BN_ULONG *a;
if (words <= (size_t)bn->dmax) {
return 1;
}
if (words > (INT_MAX / (4 * BN_BITS2))) {
OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
return 0;
}
if (bn->flags & BN_FLG_STATIC_DATA) {
OPENSSL_PUT_ERROR(BN, BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
return 0;
}
a = OPENSSL_malloc(sizeof(BN_ULONG) * words);
if (a == NULL) {
OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
return 0;
}
OPENSSL_memcpy(a, bn->d, sizeof(BN_ULONG) * bn->width);
OPENSSL_free(bn->d);
bn->d = a;
bn->dmax = (int)words;
return 1;
}
int bn_expand(BIGNUM *bn, size_t bits) {
if (bits + BN_BITS2 - 1 < bits) {
OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
return 0;
}
return bn_wexpand(bn, (bits+BN_BITS2-1)/BN_BITS2);
}
int bn_resize_words(BIGNUM *bn, size_t words) {
#if defined(OPENSSL_PPC64LE)
// This is a workaround for a miscompilation bug in Clang 7.0.1 on POWER.
// The unittests catch the miscompilation, if it occurs, and it manifests
// as a crash in |bn_fits_in_words|.
//
// The bug only triggers if building in FIPS mode and with -O3. Clang 8.0.1
// has the same bug but this workaround is not effective there---I've not
// been able to find a workaround for 8.0.1.
//
// At the time of writing (2019-08-08), Clang git does *not* have this bug
// and does not need this workaroud. The current git version should go on to
// be Clang 10 thus, once we can depend on that, this can be removed.
if (value_barrier_w((size_t)bn->width == words)) {
return 1;
}
#endif
if ((size_t)bn->width <= words) {
if (!bn_wexpand(bn, words)) {
return 0;
}
OPENSSL_memset(bn->d + bn->width, 0,
(words - bn->width) * sizeof(BN_ULONG));
bn->width = words;
return 1;
}
// All words beyond the new width must be zero.
if (!bn_fits_in_words(bn, words)) {
OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
return 0;
}
bn->width = words;
return 1;
}
void bn_select_words(BN_ULONG *r, BN_ULONG mask, const BN_ULONG *a,
const BN_ULONG *b, size_t num) {
for (size_t i = 0; i < num; i++) {
OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t),
"crypto_word_t is too small");
r[i] = constant_time_select_w(mask, a[i], b[i]);
}
}
int bn_minimal_width(const BIGNUM *bn) {
int ret = bn->width;
while (ret > 0 && bn->d[ret - 1] == 0) {
ret--;
}
return ret;
}
void bn_set_minimal_width(BIGNUM *bn) {
bn->width = bn_minimal_width(bn);
if (bn->width == 0) {
bn->neg = 0;
}
}

View File

@ -0,0 +1,230 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.] */
#include <CBigNumBoringSSL_bn.h>
#include <assert.h>
#include <limits.h>
#include "internal.h"
BIGNUM *BN_bin2bn(const uint8_t *in, size_t len, BIGNUM *ret) {
size_t num_words;
unsigned m;
BN_ULONG word = 0;
BIGNUM *bn = NULL;
if (ret == NULL) {
ret = bn = BN_new();
}
if (ret == NULL) {
return NULL;
}
if (len == 0) {
ret->width = 0;
return ret;
}
num_words = ((len - 1) / BN_BYTES) + 1;
m = (len - 1) % BN_BYTES;
if (!bn_wexpand(ret, num_words)) {
if (bn) {
BN_free(bn);
}
return NULL;
}
// |bn_wexpand| must check bounds on |num_words| to write it into
// |ret->dmax|.
assert(num_words <= INT_MAX);
ret->width = (int)num_words;
ret->neg = 0;
while (len--) {
word = (word << 8) | *(in++);
if (m-- == 0) {
ret->d[--num_words] = word;
word = 0;
m = BN_BYTES - 1;
}
}
return ret;
}
BIGNUM *BN_le2bn(const uint8_t *in, size_t len, BIGNUM *ret) {
BIGNUM *bn = NULL;
if (ret == NULL) {
bn = BN_new();
ret = bn;
}
if (ret == NULL) {
return NULL;
}
if (len == 0) {
ret->width = 0;
ret->neg = 0;
return ret;
}
// Reserve enough space in |ret|.
size_t num_words = ((len - 1) / BN_BYTES) + 1;
if (!bn_wexpand(ret, num_words)) {
BN_free(bn);
return NULL;
}
ret->width = num_words;
// Make sure the top bytes will be zeroed.
ret->d[num_words - 1] = 0;
// We only support little-endian platforms, so we can simply memcpy the
// internal representation.
OPENSSL_memcpy(ret->d, in, len);
return ret;
}
size_t BN_bn2bin(const BIGNUM *in, uint8_t *out) {
size_t n, i;
BN_ULONG l;
n = i = BN_num_bytes(in);
while (i--) {
l = in->d[i / BN_BYTES];
*(out++) = (unsigned char)(l >> (8 * (i % BN_BYTES))) & 0xff;
}
return n;
}
static int fits_in_bytes(const uint8_t *bytes, size_t num_bytes, size_t len) {
uint8_t mask = 0;
for (size_t i = len; i < num_bytes; i++) {
mask |= bytes[i];
}
return mask == 0;
}
int BN_bn2le_padded(uint8_t *out, size_t len, const BIGNUM *in) {
const uint8_t *bytes = (const uint8_t *)in->d;
size_t num_bytes = in->width * BN_BYTES;
if (len < num_bytes) {
if (!fits_in_bytes(bytes, num_bytes, len)) {
return 0;
}
num_bytes = len;
}
// We only support little-endian platforms, so we can simply memcpy into the
// internal representation.
OPENSSL_memcpy(out, bytes, num_bytes);
// Pad out the rest of the buffer with zeroes.
OPENSSL_memset(out + num_bytes, 0, len - num_bytes);
return 1;
}
int BN_bn2bin_padded(uint8_t *out, size_t len, const BIGNUM *in) {
const uint8_t *bytes = (const uint8_t *)in->d;
size_t num_bytes = in->width * BN_BYTES;
if (len < num_bytes) {
if (!fits_in_bytes(bytes, num_bytes, len)) {
return 0;
}
num_bytes = len;
}
// We only support little-endian platforms, so we can simply write the buffer
// in reverse.
for (size_t i = 0; i < num_bytes; i++) {
out[len - i - 1] = bytes[i];
}
// Pad out the rest of the buffer with zeroes.
OPENSSL_memset(out, 0, len - num_bytes);
return 1;
}
BN_ULONG BN_get_word(const BIGNUM *bn) {
switch (bn_minimal_width(bn)) {
case 0:
return 0;
case 1:
return bn->d[0];
default:
return BN_MASK2;
}
}
int BN_get_u64(const BIGNUM *bn, uint64_t *out) {
switch (bn_minimal_width(bn)) {
case 0:
*out = 0;
return 1;
case 1:
*out = bn->d[0];
return 1;
#if defined(OPENSSL_32_BIT)
case 2:
*out = (uint64_t) bn->d[0] | (((uint64_t) bn->d[1]) << 32);
return 1;
#endif
default:
return 0;
}
}

View File

@ -0,0 +1,200 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.] */
#include <CBigNumBoringSSL_bn.h>
#include <CBigNumBoringSSL_mem.h>
#include <CBigNumBoringSSL_type_check.h>
#include "internal.h"
#include "../../internal.h"
static int bn_cmp_words_consttime(const BN_ULONG *a, size_t a_len,
const BN_ULONG *b, size_t b_len) {
OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t),
"crypto_word_t is too small");
int ret = 0;
// Process the common words in little-endian order.
size_t min = a_len < b_len ? a_len : b_len;
for (size_t i = 0; i < min; i++) {
crypto_word_t eq = constant_time_eq_w(a[i], b[i]);
crypto_word_t lt = constant_time_lt_w(a[i], b[i]);
ret =
constant_time_select_int(eq, ret, constant_time_select_int(lt, -1, 1));
}
// If |a| or |b| has non-zero words beyond |min|, they take precedence.
if (a_len < b_len) {
crypto_word_t mask = 0;
for (size_t i = a_len; i < b_len; i++) {
mask |= b[i];
}
ret = constant_time_select_int(constant_time_is_zero_w(mask), ret, -1);
} else if (b_len < a_len) {
crypto_word_t mask = 0;
for (size_t i = b_len; i < a_len; i++) {
mask |= a[i];
}
ret = constant_time_select_int(constant_time_is_zero_w(mask), ret, 1);
}
return ret;
}
int BN_ucmp(const BIGNUM *a, const BIGNUM *b) {
return bn_cmp_words_consttime(a->d, a->width, b->d, b->width);
}
int BN_cmp(const BIGNUM *a, const BIGNUM *b) {
if ((a == NULL) || (b == NULL)) {
if (a != NULL) {
return -1;
} else if (b != NULL) {
return 1;
} else {
return 0;
}
}
// We do not attempt to process the sign bit in constant time. Negative
// |BIGNUM|s should never occur in crypto, only calculators.
if (a->neg != b->neg) {
if (a->neg) {
return -1;
}
return 1;
}
int ret = BN_ucmp(a, b);
return a->neg ? -ret : ret;
}
int bn_less_than_words(const BN_ULONG *a, const BN_ULONG *b, size_t len) {
return bn_cmp_words_consttime(a, len, b, len) < 0;
}
int BN_abs_is_word(const BIGNUM *bn, BN_ULONG w) {
if (bn->width == 0) {
return w == 0;
}
BN_ULONG mask = bn->d[0] ^ w;
for (int i = 1; i < bn->width; i++) {
mask |= bn->d[i];
}
return mask == 0;
}
int BN_cmp_word(const BIGNUM *a, BN_ULONG b) {
BIGNUM b_bn;
BN_init(&b_bn);
b_bn.d = &b;
b_bn.width = b > 0;
b_bn.dmax = 1;
b_bn.flags = BN_FLG_STATIC_DATA;
return BN_cmp(a, &b_bn);
}
int BN_is_zero(const BIGNUM *bn) {
return bn_fits_in_words(bn, 0);
}
int BN_is_one(const BIGNUM *bn) {
return bn->neg == 0 && BN_abs_is_word(bn, 1);
}
int BN_is_word(const BIGNUM *bn, BN_ULONG w) {
return BN_abs_is_word(bn, w) && (w == 0 || bn->neg == 0);
}
int BN_is_odd(const BIGNUM *bn) {
return bn->width > 0 && (bn->d[0] & 1) == 1;
}
int BN_is_pow2(const BIGNUM *bn) {
int width = bn_minimal_width(bn);
if (width == 0 || bn->neg) {
return 0;
}
for (int i = 0; i < width - 1; i++) {
if (bn->d[i] != 0) {
return 0;
}
}
return 0 == (bn->d[width-1] & (bn->d[width-1] - 1));
}
int BN_equal_consttime(const BIGNUM *a, const BIGNUM *b) {
BN_ULONG mask = 0;
// If |a| or |b| has more words than the other, all those words must be zero.
for (int i = a->width; i < b->width; i++) {
mask |= b->d[i];
}
for (int i = b->width; i < a->width; i++) {
mask |= a->d[i];
}
// Common words must match.
int min = a->width < b->width ? a->width : b->width;
for (int i = 0; i < min; i++) {
mask |= (a->d[i] ^ b->d[i]);
}
// The sign bit must match.
mask |= (a->neg ^ b->neg);
return mask == 0;
}

View File

@ -0,0 +1,236 @@
/* Written by Ulf Moeller for the OpenSSL project. */
/* ====================================================================
* Copyright (c) 1998-2004 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*
* This product includes cryptographic software written by Eric Young
* (eay@cryptsoft.com). This product includes software written by Tim
* Hudson (tjh@cryptsoft.com). */
#include <CBigNumBoringSSL_bn.h>
#include <assert.h>
#include <string.h>
#include <CBigNumBoringSSL_err.h>
#include <CBigNumBoringSSL_mem.h>
#include "../../internal.h"
// The stack frame info is resizing, set a first-time expansion size;
#define BN_CTX_START_FRAMES 32
// BN_STACK
// A |BN_STACK| is a stack of |size_t| values.
typedef struct {
// Array of indexes into |ctx->bignums|.
size_t *indexes;
// Number of stack frames, and the size of the allocated array
size_t depth, size;
} BN_STACK;
static void BN_STACK_init(BN_STACK *);
static void BN_STACK_cleanup(BN_STACK *);
static int BN_STACK_push(BN_STACK *, size_t idx);
static size_t BN_STACK_pop(BN_STACK *);
// BN_CTX
DEFINE_STACK_OF(BIGNUM)
// The opaque BN_CTX type
struct bignum_ctx {
// bignums is the stack of |BIGNUM|s managed by this |BN_CTX|.
STACK_OF(BIGNUM) *bignums;
// stack is the stack of |BN_CTX_start| frames. It is the value of |used| at
// the time |BN_CTX_start| was called.
BN_STACK stack;
// used is the number of |BIGNUM|s from |bignums| that have been used.
size_t used;
// error is one if any operation on this |BN_CTX| failed. All subsequent
// operations will fail.
char error;
// defer_error is one if an operation on this |BN_CTX| has failed, but no
// error has been pushed to the queue yet. This is used to defer errors from
// |BN_CTX_start| to |BN_CTX_get|.
char defer_error;
};
BN_CTX *BN_CTX_new(void) {
BN_CTX *ret = OPENSSL_malloc(sizeof(BN_CTX));
if (!ret) {
OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
return NULL;
}
// Initialise the structure
ret->bignums = NULL;
BN_STACK_init(&ret->stack);
ret->used = 0;
ret->error = 0;
ret->defer_error = 0;
return ret;
}
void BN_CTX_free(BN_CTX *ctx) {
if (ctx == NULL) {
return;
}
// All |BN_CTX_start| calls must be matched with |BN_CTX_end|, otherwise the
// function may use more memory than expected, potentially without bound if
// done in a loop. Assert that all |BIGNUM|s have been released.
assert(ctx->used == 0 || ctx->error);
sk_BIGNUM_pop_free(ctx->bignums, BN_free);
BN_STACK_cleanup(&ctx->stack);
OPENSSL_free(ctx);
}
void BN_CTX_start(BN_CTX *ctx) {
if (ctx->error) {
// Once an operation has failed, |ctx->stack| no longer matches the number
// of |BN_CTX_end| calls to come. Do nothing.
return;
}
if (!BN_STACK_push(&ctx->stack, ctx->used)) {
ctx->error = 1;
// |BN_CTX_start| cannot fail, so defer the error to |BN_CTX_get|.
ctx->defer_error = 1;
}
}
BIGNUM *BN_CTX_get(BN_CTX *ctx) {
// Once any operation has failed, they all do.
if (ctx->error) {
if (ctx->defer_error) {
OPENSSL_PUT_ERROR(BN, BN_R_TOO_MANY_TEMPORARY_VARIABLES);
ctx->defer_error = 0;
}
return NULL;
}
if (ctx->bignums == NULL) {
ctx->bignums = sk_BIGNUM_new_null();
if (ctx->bignums == NULL) {
OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
ctx->error = 1;
return NULL;
}
}
if (ctx->used == sk_BIGNUM_num(ctx->bignums)) {
BIGNUM *bn = BN_new();
if (bn == NULL || !sk_BIGNUM_push(ctx->bignums, bn)) {
OPENSSL_PUT_ERROR(BN, BN_R_TOO_MANY_TEMPORARY_VARIABLES);
BN_free(bn);
ctx->error = 1;
return NULL;
}
}
BIGNUM *ret = sk_BIGNUM_value(ctx->bignums, ctx->used);
BN_zero(ret);
// This is bounded by |sk_BIGNUM_num|, so it cannot overflow.
ctx->used++;
return ret;
}
void BN_CTX_end(BN_CTX *ctx) {
if (ctx->error) {
// Once an operation has failed, |ctx->stack| no longer matches the number
// of |BN_CTX_end| calls to come. Do nothing.
return;
}
ctx->used = BN_STACK_pop(&ctx->stack);
}
// BN_STACK
static void BN_STACK_init(BN_STACK *st) {
st->indexes = NULL;
st->depth = st->size = 0;
}
static void BN_STACK_cleanup(BN_STACK *st) {
OPENSSL_free(st->indexes);
}
static int BN_STACK_push(BN_STACK *st, size_t idx) {
if (st->depth == st->size) {
// This function intentionally does not push to the error queue on error.
// Error-reporting is deferred to |BN_CTX_get|.
size_t new_size = st->size != 0 ? st->size * 3 / 2 : BN_CTX_START_FRAMES;
if (new_size <= st->size || new_size > ((size_t)-1) / sizeof(size_t)) {
return 0;
}
size_t *new_indexes =
OPENSSL_realloc(st->indexes, new_size * sizeof(size_t));
if (new_indexes == NULL) {
return 0;
}
st->indexes = new_indexes;
st->size = new_size;
}
st->indexes[st->depth] = idx;
st->depth++;
return 1;
}
static size_t BN_STACK_pop(BN_STACK *st) {
assert(st->depth > 0);
st->depth--;
return st->indexes[st->depth];
}

View File

@ -0,0 +1,886 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.] */
#include <CBigNumBoringSSL_bn.h>
#include <assert.h>
#include <limits.h>
#include <CBigNumBoringSSL_err.h>
#include "internal.h"
#if !defined(BN_CAN_DIVIDE_ULLONG) && !defined(BN_CAN_USE_INLINE_ASM)
// bn_div_words divides a double-width |h|,|l| by |d| and returns the result,
// which must fit in a |BN_ULONG|.
static BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) {
BN_ULONG dh, dl, q, ret = 0, th, tl, t;
int i, count = 2;
if (d == 0) {
return BN_MASK2;
}
i = BN_num_bits_word(d);
assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i));
i = BN_BITS2 - i;
if (h >= d) {
h -= d;
}
if (i) {
d <<= i;
h = (h << i) | (l >> (BN_BITS2 - i));
l <<= i;
}
dh = (d & BN_MASK2h) >> BN_BITS4;
dl = (d & BN_MASK2l);
for (;;) {
if ((h >> BN_BITS4) == dh) {
q = BN_MASK2l;
} else {
q = h / dh;
}
th = q * dh;
tl = dl * q;
for (;;) {
t = h - th;
if ((t & BN_MASK2h) ||
((tl) <= ((t << BN_BITS4) | ((l & BN_MASK2h) >> BN_BITS4)))) {
break;
}
q--;
th -= dh;
tl -= dl;
}
t = (tl >> BN_BITS4);
tl = (tl << BN_BITS4) & BN_MASK2h;
th += t;
if (l < tl) {
th++;
}
l -= tl;
if (h < th) {
h += d;
q--;
}
h -= th;
if (--count == 0) {
break;
}
ret = q << BN_BITS4;
h = (h << BN_BITS4) | (l >> BN_BITS4);
l = (l & BN_MASK2l) << BN_BITS4;
}
ret |= q;
return ret;
}
#endif // !defined(BN_CAN_DIVIDE_ULLONG) && !defined(BN_CAN_USE_INLINE_ASM)
static inline void bn_div_rem_words(BN_ULONG *quotient_out, BN_ULONG *rem_out,
BN_ULONG n0, BN_ULONG n1, BN_ULONG d0) {
// GCC and Clang generate function calls to |__udivdi3| and |__umoddi3| when
// the |BN_ULLONG|-based C code is used.
//
// GCC bugs:
// * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=14224
// * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43721
// * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54183
// * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58897
// * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65668
//
// Clang bugs:
// * https://llvm.org/bugs/show_bug.cgi?id=6397
// * https://llvm.org/bugs/show_bug.cgi?id=12418
//
// These issues aren't specific to x86 and x86_64, so it might be worthwhile
// to add more assembly language implementations.
#if defined(BN_CAN_USE_INLINE_ASM) && defined(OPENSSL_X86)
__asm__ volatile("divl %4"
: "=a"(*quotient_out), "=d"(*rem_out)
: "a"(n1), "d"(n0), "rm"(d0)
: "cc");
#elif defined(BN_CAN_USE_INLINE_ASM) && defined(OPENSSL_X86_64)
__asm__ volatile("divq %4"
: "=a"(*quotient_out), "=d"(*rem_out)
: "a"(n1), "d"(n0), "rm"(d0)
: "cc");
#else
#if defined(BN_CAN_DIVIDE_ULLONG)
BN_ULLONG n = (((BN_ULLONG)n0) << BN_BITS2) | n1;
*quotient_out = (BN_ULONG)(n / d0);
#else
*quotient_out = bn_div_words(n0, n1, d0);
#endif
*rem_out = n1 - (*quotient_out * d0);
#endif
}
// BN_div computes "quotient := numerator / divisor", rounding towards zero,
// and sets up |rem| such that "quotient * divisor + rem = numerator" holds.
//
// Thus:
//
// quotient->neg == numerator->neg ^ divisor->neg
// (unless the result is zero)
// rem->neg == numerator->neg
// (unless the remainder is zero)
//
// If |quotient| or |rem| is NULL, the respective value is not returned.
//
// This was specifically designed to contain fewer branches that may leak
// sensitive information; see "New Branch Prediction Vulnerabilities in OpenSSL
// and Necessary Software Countermeasures" by Onur Acıçmez, Shay Gueron, and
// Jean-Pierre Seifert.
int BN_div(BIGNUM *quotient, BIGNUM *rem, const BIGNUM *numerator,
const BIGNUM *divisor, BN_CTX *ctx) {
int norm_shift, loop;
BIGNUM wnum;
BN_ULONG *resp, *wnump;
BN_ULONG d0, d1;
int num_n, div_n;
// This function relies on the historical minimal-width |BIGNUM| invariant.
// It is already not constant-time (constant-time reductions should use
// Montgomery logic), so we shrink all inputs and intermediate values to
// retain the previous behavior.
// Invalid zero-padding would have particularly bad consequences.
int numerator_width = bn_minimal_width(numerator);
int divisor_width = bn_minimal_width(divisor);
if ((numerator_width > 0 && numerator->d[numerator_width - 1] == 0) ||
(divisor_width > 0 && divisor->d[divisor_width - 1] == 0)) {
OPENSSL_PUT_ERROR(BN, BN_R_NOT_INITIALIZED);
return 0;
}
if (BN_is_zero(divisor)) {
OPENSSL_PUT_ERROR(BN, BN_R_DIV_BY_ZERO);
return 0;
}
BN_CTX_start(ctx);
BIGNUM *tmp = BN_CTX_get(ctx);
BIGNUM *snum = BN_CTX_get(ctx);
BIGNUM *sdiv = BN_CTX_get(ctx);
BIGNUM *res = NULL;
if (quotient == NULL) {
res = BN_CTX_get(ctx);
} else {
res = quotient;
}
if (sdiv == NULL || res == NULL) {
goto err;
}
// First we normalise the numbers
norm_shift = BN_BITS2 - (BN_num_bits(divisor) % BN_BITS2);
if (!BN_lshift(sdiv, divisor, norm_shift)) {
goto err;
}
bn_set_minimal_width(sdiv);
sdiv->neg = 0;
norm_shift += BN_BITS2;
if (!BN_lshift(snum, numerator, norm_shift)) {
goto err;
}
bn_set_minimal_width(snum);
snum->neg = 0;
// Since we don't want to have special-case logic for the case where snum is
// larger than sdiv, we pad snum with enough zeroes without changing its
// value.
if (snum->width <= sdiv->width + 1) {
if (!bn_wexpand(snum, sdiv->width + 2)) {
goto err;
}
for (int i = snum->width; i < sdiv->width + 2; i++) {
snum->d[i] = 0;
}
snum->width = sdiv->width + 2;
} else {
if (!bn_wexpand(snum, snum->width + 1)) {
goto err;
}
snum->d[snum->width] = 0;
snum->width++;
}
div_n = sdiv->width;
num_n = snum->width;
loop = num_n - div_n;
// Lets setup a 'window' into snum
// This is the part that corresponds to the current
// 'area' being divided
wnum.neg = 0;
wnum.d = &(snum->d[loop]);
wnum.width = div_n;
// only needed when BN_ucmp messes up the values between width and max
wnum.dmax = snum->dmax - loop; // so we don't step out of bounds
// Get the top 2 words of sdiv
// div_n=sdiv->width;
d0 = sdiv->d[div_n - 1];
d1 = (div_n == 1) ? 0 : sdiv->d[div_n - 2];
// pointer to the 'top' of snum
wnump = &(snum->d[num_n - 1]);
// Setup to 'res'
res->neg = (numerator->neg ^ divisor->neg);
if (!bn_wexpand(res, loop + 1)) {
goto err;
}
res->width = loop - 1;
resp = &(res->d[loop - 1]);
// space for temp
if (!bn_wexpand(tmp, div_n + 1)) {
goto err;
}
// if res->width == 0 then clear the neg value otherwise decrease
// the resp pointer
if (res->width == 0) {
res->neg = 0;
} else {
resp--;
}
for (int i = 0; i < loop - 1; i++, wnump--, resp--) {
BN_ULONG q, l0;
// the first part of the loop uses the top two words of snum and sdiv to
// calculate a BN_ULONG q such that | wnum - sdiv * q | < sdiv
BN_ULONG n0, n1, rm = 0;
n0 = wnump[0];
n1 = wnump[-1];
if (n0 == d0) {
q = BN_MASK2;
} else {
// n0 < d0
bn_div_rem_words(&q, &rm, n0, n1, d0);
#ifdef BN_ULLONG
BN_ULLONG t2 = (BN_ULLONG)d1 * q;
for (;;) {
if (t2 <= ((((BN_ULLONG)rm) << BN_BITS2) | wnump[-2])) {
break;
}
q--;
rm += d0;
if (rm < d0) {
break; // don't let rm overflow
}
t2 -= d1;
}
#else // !BN_ULLONG
BN_ULONG t2l, t2h;
BN_UMULT_LOHI(t2l, t2h, d1, q);
for (;;) {
if (t2h < rm ||
(t2h == rm && t2l <= wnump[-2])) {
break;
}
q--;
rm += d0;
if (rm < d0) {
break; // don't let rm overflow
}
if (t2l < d1) {
t2h--;
}
t2l -= d1;
}
#endif // !BN_ULLONG
}
l0 = bn_mul_words(tmp->d, sdiv->d, div_n, q);
tmp->d[div_n] = l0;
wnum.d--;
// ingore top values of the bignums just sub the two
// BN_ULONG arrays with bn_sub_words
if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n + 1)) {
// Note: As we have considered only the leading
// two BN_ULONGs in the calculation of q, sdiv * q
// might be greater than wnum (but then (q-1) * sdiv
// is less or equal than wnum)
q--;
if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n)) {
// we can't have an overflow here (assuming
// that q != 0, but if q == 0 then tmp is
// zero anyway)
(*wnump)++;
}
}
// store part of the result
*resp = q;
}
bn_set_minimal_width(snum);
if (rem != NULL) {
// Keep a copy of the neg flag in numerator because if |rem| == |numerator|
// |BN_rshift| will overwrite it.
int neg = numerator->neg;
if (!BN_rshift(rem, snum, norm_shift)) {
goto err;
}
if (!BN_is_zero(rem)) {
rem->neg = neg;
}
}
bn_set_minimal_width(res);
BN_CTX_end(ctx);
return 1;
err:
BN_CTX_end(ctx);
return 0;
}
int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx) {
if (!(BN_mod(r, m, d, ctx))) {
return 0;
}
if (!r->neg) {
return 1;
}
// now -|d| < r < 0, so we have to set r := r + |d|.
return (d->neg ? BN_sub : BN_add)(r, r, d);
}
BN_ULONG bn_reduce_once(BN_ULONG *r, const BN_ULONG *a, BN_ULONG carry,
const BN_ULONG *m, size_t num) {
assert(r != a);
// |r| = |a| - |m|. |bn_sub_words| performs the bulk of the subtraction, and
// then we apply the borrow to |carry|.
carry -= bn_sub_words(r, a, m, num);
// We know 0 <= |a| < 2*|m|, so -|m| <= |r| < |m|.
//
// If 0 <= |r| < |m|, |r| fits in |num| words and |carry| is zero. We then
// wish to select |r| as the answer. Otherwise -m <= r < 0 and we wish to
// return |r| + |m|, or |a|. |carry| must then be -1 or all ones. In both
// cases, |carry| is a suitable input to |bn_select_words|.
//
// Although |carry| may be one if it was one on input and |bn_sub_words|
// returns zero, this would give |r| > |m|, violating our input assumptions.
assert(carry == 0 || carry == (BN_ULONG)-1);
bn_select_words(r, carry, a /* r < 0 */, r /* r >= 0 */, num);
return carry;
}
BN_ULONG bn_reduce_once_in_place(BN_ULONG *r, BN_ULONG carry, const BN_ULONG *m,
BN_ULONG *tmp, size_t num) {
// See |bn_reduce_once| for why this logic works.
carry -= bn_sub_words(tmp, r, m, num);
assert(carry == 0 || carry == (BN_ULONG)-1);
bn_select_words(r, carry, r /* tmp < 0 */, tmp /* tmp >= 0 */, num);
return carry;
}
void bn_mod_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
const BN_ULONG *m, BN_ULONG *tmp, size_t num) {
// r = a - b
BN_ULONG borrow = bn_sub_words(r, a, b, num);
// tmp = a - b + m
bn_add_words(tmp, r, m, num);
bn_select_words(r, 0 - borrow, tmp /* r < 0 */, r /* r >= 0 */, num);
}
void bn_mod_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
const BN_ULONG *m, BN_ULONG *tmp, size_t num) {
BN_ULONG carry = bn_add_words(r, a, b, num);
bn_reduce_once_in_place(r, carry, m, tmp, num);
}
int bn_div_consttime(BIGNUM *quotient, BIGNUM *remainder,
const BIGNUM *numerator, const BIGNUM *divisor,
BN_CTX *ctx) {
if (BN_is_negative(numerator) || BN_is_negative(divisor)) {
OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
return 0;
}
if (BN_is_zero(divisor)) {
OPENSSL_PUT_ERROR(BN, BN_R_DIV_BY_ZERO);
return 0;
}
// This function implements long division in binary. It is not very efficient,
// but it is simple, easy to make constant-time, and performant enough for RSA
// key generation.
int ret = 0;
BN_CTX_start(ctx);
BIGNUM *q = quotient, *r = remainder;
if (quotient == NULL || quotient == numerator || quotient == divisor) {
q = BN_CTX_get(ctx);
}
if (remainder == NULL || remainder == numerator || remainder == divisor) {
r = BN_CTX_get(ctx);
}
BIGNUM *tmp = BN_CTX_get(ctx);
if (q == NULL || r == NULL || tmp == NULL ||
!bn_wexpand(q, numerator->width) ||
!bn_wexpand(r, divisor->width) ||
!bn_wexpand(tmp, divisor->width)) {
goto err;
}
OPENSSL_memset(q->d, 0, numerator->width * sizeof(BN_ULONG));
q->width = numerator->width;
q->neg = 0;
OPENSSL_memset(r->d, 0, divisor->width * sizeof(BN_ULONG));
r->width = divisor->width;
r->neg = 0;
// Incorporate |numerator| into |r|, one bit at a time, reducing after each
// step. At the start of each loop iteration, |r| < |divisor|
for (int i = numerator->width - 1; i >= 0; i--) {
for (int bit = BN_BITS2 - 1; bit >= 0; bit--) {
// Incorporate the next bit of the numerator, by computing
// r = 2*r or 2*r + 1. Note the result fits in one more word. We store the
// extra word in |carry|.
BN_ULONG carry = bn_add_words(r->d, r->d, r->d, divisor->width);
r->d[0] |= (numerator->d[i] >> bit) & 1;
// |r| was previously fully-reduced, so we know:
// 2*0 <= r <= 2*(divisor-1) + 1
// 0 <= r <= 2*divisor - 1 < 2*divisor.
// Thus |r| satisfies the preconditions for |bn_reduce_once_in_place|.
BN_ULONG subtracted = bn_reduce_once_in_place(r->d, carry, divisor->d,
tmp->d, divisor->width);
// The corresponding bit of the quotient is set iff we needed to subtract.
q->d[i] |= (~subtracted & 1) << bit;
}
}
if ((quotient != NULL && !BN_copy(quotient, q)) ||
(remainder != NULL && !BN_copy(remainder, r))) {
goto err;
}
ret = 1;
err:
BN_CTX_end(ctx);
return ret;
}
static BIGNUM *bn_scratch_space_from_ctx(size_t width, BN_CTX *ctx) {
BIGNUM *ret = BN_CTX_get(ctx);
if (ret == NULL ||
!bn_wexpand(ret, width)) {
return NULL;
}
ret->neg = 0;
ret->width = width;
return ret;
}
// bn_resized_from_ctx returns |bn| with width at least |width| or NULL on
// error. This is so it may be used with low-level "words" functions. If
// necessary, it allocates a new |BIGNUM| with a lifetime of the current scope
// in |ctx|, so the caller does not need to explicitly free it. |bn| must fit in
// |width| words.
static const BIGNUM *bn_resized_from_ctx(const BIGNUM *bn, size_t width,
BN_CTX *ctx) {
if ((size_t)bn->width >= width) {
// Any excess words must be zero.
assert(bn_fits_in_words(bn, width));
return bn;
}
BIGNUM *ret = bn_scratch_space_from_ctx(width, ctx);
if (ret == NULL ||
!BN_copy(ret, bn) ||
!bn_resize_words(ret, width)) {
return NULL;
}
return ret;
}
int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
BN_CTX *ctx) {
if (!BN_add(r, a, b)) {
return 0;
}
return BN_nnmod(r, r, m, ctx);
}
int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
const BIGNUM *m) {
BN_CTX *ctx = BN_CTX_new();
int ok = ctx != NULL &&
bn_mod_add_consttime(r, a, b, m, ctx);
BN_CTX_free(ctx);
return ok;
}
int bn_mod_add_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
const BIGNUM *m, BN_CTX *ctx) {
BN_CTX_start(ctx);
a = bn_resized_from_ctx(a, m->width, ctx);
b = bn_resized_from_ctx(b, m->width, ctx);
BIGNUM *tmp = bn_scratch_space_from_ctx(m->width, ctx);
int ok = a != NULL && b != NULL && tmp != NULL &&
bn_wexpand(r, m->width);
if (ok) {
bn_mod_add_words(r->d, a->d, b->d, m->d, tmp->d, m->width);
r->width = m->width;
r->neg = 0;
}
BN_CTX_end(ctx);
return ok;
}
int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
BN_CTX *ctx) {
if (!BN_sub(r, a, b)) {
return 0;
}
return BN_nnmod(r, r, m, ctx);
}
int bn_mod_sub_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
const BIGNUM *m, BN_CTX *ctx) {
BN_CTX_start(ctx);
a = bn_resized_from_ctx(a, m->width, ctx);
b = bn_resized_from_ctx(b, m->width, ctx);
BIGNUM *tmp = bn_scratch_space_from_ctx(m->width, ctx);
int ok = a != NULL && b != NULL && tmp != NULL &&
bn_wexpand(r, m->width);
if (ok) {
bn_mod_sub_words(r->d, a->d, b->d, m->d, tmp->d, m->width);
r->width = m->width;
r->neg = 0;
}
BN_CTX_end(ctx);
return ok;
}
int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
const BIGNUM *m) {
BN_CTX *ctx = BN_CTX_new();
int ok = ctx != NULL &&
bn_mod_sub_consttime(r, a, b, m, ctx);
BN_CTX_free(ctx);
return ok;
}
int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
BN_CTX *ctx) {
BIGNUM *t;
int ret = 0;
BN_CTX_start(ctx);
t = BN_CTX_get(ctx);
if (t == NULL) {
goto err;
}
if (a == b) {
if (!BN_sqr(t, a, ctx)) {
goto err;
}
} else {
if (!BN_mul(t, a, b, ctx)) {
goto err;
}
}
if (!BN_nnmod(r, t, m, ctx)) {
goto err;
}
ret = 1;
err:
BN_CTX_end(ctx);
return ret;
}
int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx) {
if (!BN_sqr(r, a, ctx)) {
return 0;
}
// r->neg == 0, thus we don't need BN_nnmod
return BN_mod(r, r, m, ctx);
}
int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m,
BN_CTX *ctx) {
BIGNUM *abs_m = NULL;
int ret;
if (!BN_nnmod(r, a, m, ctx)) {
return 0;
}
if (m->neg) {
abs_m = BN_dup(m);
if (abs_m == NULL) {
return 0;
}
abs_m->neg = 0;
}
ret = bn_mod_lshift_consttime(r, r, n, (abs_m ? abs_m : m), ctx);
BN_free(abs_m);
return ret;
}
int bn_mod_lshift_consttime(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m,
BN_CTX *ctx) {
if (!BN_copy(r, a)) {
return 0;
}
for (int i = 0; i < n; i++) {
if (!bn_mod_lshift1_consttime(r, r, m, ctx)) {
return 0;
}
}
return 1;
}
int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m) {
BN_CTX *ctx = BN_CTX_new();
int ok = ctx != NULL &&
bn_mod_lshift_consttime(r, a, n, m, ctx);
BN_CTX_free(ctx);
return ok;
}
int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx) {
if (!BN_lshift1(r, a)) {
return 0;
}
return BN_nnmod(r, r, m, ctx);
}
int bn_mod_lshift1_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *m,
BN_CTX *ctx) {
return bn_mod_add_consttime(r, a, a, m, ctx);
}
int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m) {
BN_CTX *ctx = BN_CTX_new();
int ok = ctx != NULL &&
bn_mod_lshift1_consttime(r, a, m, ctx);
BN_CTX_free(ctx);
return ok;
}
BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w) {
BN_ULONG ret = 0;
int i, j;
if (!w) {
// actually this an error (division by zero)
return (BN_ULONG) - 1;
}
if (a->width == 0) {
return 0;
}
// normalize input for |bn_div_rem_words|.
j = BN_BITS2 - BN_num_bits_word(w);
w <<= j;
if (!BN_lshift(a, a, j)) {
return (BN_ULONG) - 1;
}
for (i = a->width - 1; i >= 0; i--) {
BN_ULONG l = a->d[i];
BN_ULONG d;
BN_ULONG unused_rem;
bn_div_rem_words(&d, &unused_rem, ret, l, w);
ret = l - (d * w);
a->d[i] = d;
}
bn_set_minimal_width(a);
ret >>= j;
return ret;
}
BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w) {
#ifndef BN_CAN_DIVIDE_ULLONG
BN_ULONG ret = 0;
#else
BN_ULLONG ret = 0;
#endif
int i;
if (w == 0) {
return (BN_ULONG) -1;
}
#ifndef BN_CAN_DIVIDE_ULLONG
// If |w| is too long and we don't have |BN_ULLONG| division then we need to
// fall back to using |BN_div_word|.
if (w > ((BN_ULONG)1 << BN_BITS4)) {
BIGNUM *tmp = BN_dup(a);
if (tmp == NULL) {
return (BN_ULONG)-1;
}
ret = BN_div_word(tmp, w);
BN_free(tmp);
return ret;
}
#endif
for (i = a->width - 1; i >= 0; i--) {
#ifndef BN_CAN_DIVIDE_ULLONG
ret = ((ret << BN_BITS4) | ((a->d[i] >> BN_BITS4) & BN_MASK2l)) % w;
ret = ((ret << BN_BITS4) | (a->d[i] & BN_MASK2l)) % w;
#else
ret = (BN_ULLONG)(((ret << (BN_ULLONG)BN_BITS2) | a->d[i]) % (BN_ULLONG)w);
#endif
}
return (BN_ULONG)ret;
}
int BN_mod_pow2(BIGNUM *r, const BIGNUM *a, size_t e) {
if (e == 0 || a->width == 0) {
BN_zero(r);
return 1;
}
size_t num_words = 1 + ((e - 1) / BN_BITS2);
// If |a| definitely has less than |e| bits, just BN_copy.
if ((size_t) a->width < num_words) {
return BN_copy(r, a) != NULL;
}
// Otherwise, first make sure we have enough space in |r|.
// Note that this will fail if num_words > INT_MAX.
if (!bn_wexpand(r, num_words)) {
return 0;
}
// Copy the content of |a| into |r|.
OPENSSL_memcpy(r->d, a->d, num_words * sizeof(BN_ULONG));
// If |e| isn't word-aligned, we have to mask off some of our bits.
size_t top_word_exponent = e % (sizeof(BN_ULONG) * 8);
if (top_word_exponent != 0) {
r->d[num_words - 1] &= (((BN_ULONG) 1) << top_word_exponent) - 1;
}
// Fill in the remaining fields of |r|.
r->neg = a->neg;
r->width = (int) num_words;
bn_set_minimal_width(r);
return 1;
}
int BN_nnmod_pow2(BIGNUM *r, const BIGNUM *a, size_t e) {
if (!BN_mod_pow2(r, a, e)) {
return 0;
}
// If the returned value was non-negative, we're done.
if (BN_is_zero(r) || !r->neg) {
return 1;
}
size_t num_words = 1 + (e - 1) / BN_BITS2;
// Expand |r| to the size of our modulus.
if (!bn_wexpand(r, num_words)) {
return 0;
}
// Clear the upper words of |r|.
OPENSSL_memset(&r->d[r->width], 0, (num_words - r->width) * BN_BYTES);
// Set parameters of |r|.
r->neg = 0;
r->width = (int) num_words;
// Now, invert every word. The idea here is that we want to compute 2^e-|x|,
// which is actually equivalent to the twos-complement representation of |x|
// in |e| bits, which is -x = ~x + 1.
for (int i = 0; i < r->width; i++) {
r->d[i] = ~r->d[i];
}
// If our exponent doesn't span the top word, we have to mask the rest.
size_t top_word_exponent = e % BN_BITS2;
if (top_word_exponent != 0) {
r->d[r->width - 1] &= (((BN_ULONG) 1) << top_word_exponent) - 1;
}
// Keep the minimal-width invariant for |BIGNUM|.
bn_set_minimal_width(r);
// Finally, add one, for the reason described above.
return BN_add(r, r, BN_value_one());
}

View File

@ -0,0 +1,87 @@
/* Copyright (c) 2018, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#include <CBigNumBoringSSL_bn.h>
#include <assert.h>
#include "internal.h"
// The following functions use a Barrett reduction variant to avoid leaking the
// numerator. See http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html
//
// We use 32-bit numerator and 16-bit divisor for simplicity. This allows
// computing |m| and |q| without architecture-specific code.
// mod_u16 returns |n| mod |d|. |p| and |m| are the "magic numbers" for |d| (see
// reference). For proof of correctness in Coq, see
// https://github.com/davidben/fiat-crypto/blob/barrett/src/Arithmetic/BarrettReduction/RidiculousFish.v
// Note the Coq version of |mod_u16| additionally includes the computation of
// |p| and |m| from |bn_mod_u16_consttime| below.
static uint16_t mod_u16(uint32_t n, uint16_t d, uint32_t p, uint32_t m) {
// Compute floor(n/d) per steps 3 through 5.
uint32_t q = ((uint64_t)m * n) >> 32;
// Note there is a typo in the reference. We right-shift by one, not two.
uint32_t t = ((n - q) >> 1) + q;
t = t >> (p - 1);
// Multiply and subtract to get the remainder.
n -= d * t;
assert(n < d);
return n;
}
// shift_and_add_mod_u16 returns |r| * 2^32 + |a| mod |d|. |p| and |m| are the
// "magic numbers" for |d| (see reference).
static uint16_t shift_and_add_mod_u16(uint16_t r, uint32_t a, uint16_t d,
uint32_t p, uint32_t m) {
// Incorporate |a| in two 16-bit chunks.
uint32_t t = r;
t <<= 16;
t |= a >> 16;
t = mod_u16(t, d, p, m);
t <<= 16;
t |= a & 0xffff;
t = mod_u16(t, d, p, m);
return t;
}
uint16_t bn_mod_u16_consttime(const BIGNUM *bn, uint16_t d) {
if (d <= 1) {
return 0;
}
// Compute the "magic numbers" for |d|. See steps 1 and 2.
// This computes p = ceil(log_2(d)).
uint32_t p = BN_num_bits_word(d - 1);
// This operation is not constant-time, but |p| and |d| are public values.
// Note that |p| is at most 16, so the computation fits in |uint64_t|.
assert(p <= 16);
uint32_t m = ((UINT64_C(1) << (32 + p)) + d - 1) / d;
uint16_t ret = 0;
for (int i = bn->width - 1; i >= 0; i--) {
#if BN_BITS2 == 32
ret = shift_and_add_mod_u16(ret, bn->d[i], d, p, m);
#elif BN_BITS2 == 64
ret = shift_and_add_mod_u16(ret, bn->d[i] >> 32, d, p, m);
ret = shift_and_add_mod_u16(ret, bn->d[i] & 0xffffffff, d, p, m);
#else
#error "Unknown BN_ULONG size"
#endif
}
return ret;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,378 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.]
*/
/* ====================================================================
* Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*
* This product includes cryptographic software written by Eric Young
* (eay@cryptsoft.com). This product includes software written by Tim
* Hudson (tjh@cryptsoft.com). */
#include <CBigNumBoringSSL_bn.h>
#include <CBigNumBoringSSL_err.h>
#include "internal.h"
int BN_mod_inverse_odd(BIGNUM *out, int *out_no_inverse, const BIGNUM *a,
const BIGNUM *n, BN_CTX *ctx) {
*out_no_inverse = 0;
if (!BN_is_odd(n)) {
OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
return 0;
}
if (BN_is_negative(a) || BN_cmp(a, n) >= 0) {
OPENSSL_PUT_ERROR(BN, BN_R_INPUT_NOT_REDUCED);
return 0;
}
BIGNUM *A, *B, *X, *Y;
int ret = 0;
int sign;
BN_CTX_start(ctx);
A = BN_CTX_get(ctx);
B = BN_CTX_get(ctx);
X = BN_CTX_get(ctx);
Y = BN_CTX_get(ctx);
if (Y == NULL) {
goto err;
}
BIGNUM *R = out;
BN_zero(Y);
if (!BN_one(X) || BN_copy(B, a) == NULL || BN_copy(A, n) == NULL) {
goto err;
}
A->neg = 0;
sign = -1;
// From B = a mod |n|, A = |n| it follows that
//
// 0 <= B < A,
// -sign*X*a == B (mod |n|),
// sign*Y*a == A (mod |n|).
// Binary inversion algorithm; requires odd modulus. This is faster than the
// general algorithm if the modulus is sufficiently small (about 400 .. 500
// bits on 32-bit systems, but much more on 64-bit systems)
int shift;
while (!BN_is_zero(B)) {
// 0 < B < |n|,
// 0 < A <= |n|,
// (1) -sign*X*a == B (mod |n|),
// (2) sign*Y*a == A (mod |n|)
// Now divide B by the maximum possible power of two in the integers,
// and divide X by the same value mod |n|.
// When we're done, (1) still holds.
shift = 0;
while (!BN_is_bit_set(B, shift)) {
// note that 0 < B
shift++;
if (BN_is_odd(X)) {
if (!BN_uadd(X, X, n)) {
goto err;
}
}
// now X is even, so we can easily divide it by two
if (!BN_rshift1(X, X)) {
goto err;
}
}
if (shift > 0) {
if (!BN_rshift(B, B, shift)) {
goto err;
}
}
// Same for A and Y. Afterwards, (2) still holds.
shift = 0;
while (!BN_is_bit_set(A, shift)) {
// note that 0 < A
shift++;
if (BN_is_odd(Y)) {
if (!BN_uadd(Y, Y, n)) {
goto err;
}
}
// now Y is even
if (!BN_rshift1(Y, Y)) {
goto err;
}
}
if (shift > 0) {
if (!BN_rshift(A, A, shift)) {
goto err;
}
}
// We still have (1) and (2).
// Both A and B are odd.
// The following computations ensure that
//
// 0 <= B < |n|,
// 0 < A < |n|,
// (1) -sign*X*a == B (mod |n|),
// (2) sign*Y*a == A (mod |n|),
//
// and that either A or B is even in the next iteration.
if (BN_ucmp(B, A) >= 0) {
// -sign*(X + Y)*a == B - A (mod |n|)
if (!BN_uadd(X, X, Y)) {
goto err;
}
// NB: we could use BN_mod_add_quick(X, X, Y, n), but that
// actually makes the algorithm slower
if (!BN_usub(B, B, A)) {
goto err;
}
} else {
// sign*(X + Y)*a == A - B (mod |n|)
if (!BN_uadd(Y, Y, X)) {
goto err;
}
// as above, BN_mod_add_quick(Y, Y, X, n) would slow things down
if (!BN_usub(A, A, B)) {
goto err;
}
}
}
if (!BN_is_one(A)) {
*out_no_inverse = 1;
OPENSSL_PUT_ERROR(BN, BN_R_NO_INVERSE);
goto err;
}
// The while loop (Euclid's algorithm) ends when
// A == gcd(a,n);
// we have
// sign*Y*a == A (mod |n|),
// where Y is non-negative.
if (sign < 0) {
if (!BN_sub(Y, n, Y)) {
goto err;
}
}
// Now Y*a == A (mod |n|).
// Y*a == 1 (mod |n|)
if (!Y->neg && BN_ucmp(Y, n) < 0) {
if (!BN_copy(R, Y)) {
goto err;
}
} else {
if (!BN_nnmod(R, Y, n, ctx)) {
goto err;
}
}
ret = 1;
err:
BN_CTX_end(ctx);
return ret;
}
BIGNUM *BN_mod_inverse(BIGNUM *out, const BIGNUM *a, const BIGNUM *n,
BN_CTX *ctx) {
BIGNUM *new_out = NULL;
if (out == NULL) {
new_out = BN_new();
if (new_out == NULL) {
OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
return NULL;
}
out = new_out;
}
int ok = 0;
BIGNUM *a_reduced = NULL;
if (a->neg || BN_ucmp(a, n) >= 0) {
a_reduced = BN_dup(a);
if (a_reduced == NULL) {
goto err;
}
if (!BN_nnmod(a_reduced, a_reduced, n, ctx)) {
goto err;
}
a = a_reduced;
}
int no_inverse;
if (!BN_is_odd(n)) {
if (!bn_mod_inverse_consttime(out, &no_inverse, a, n, ctx)) {
goto err;
}
} else if (!BN_mod_inverse_odd(out, &no_inverse, a, n, ctx)) {
goto err;
}
ok = 1;
err:
if (!ok) {
BN_free(new_out);
out = NULL;
}
BN_free(a_reduced);
return out;
}
int BN_mod_inverse_blinded(BIGNUM *out, int *out_no_inverse, const BIGNUM *a,
const BN_MONT_CTX *mont, BN_CTX *ctx) {
*out_no_inverse = 0;
if (BN_is_negative(a) || BN_cmp(a, &mont->N) >= 0) {
OPENSSL_PUT_ERROR(BN, BN_R_INPUT_NOT_REDUCED);
return 0;
}
int ret = 0;
BIGNUM blinding_factor;
BN_init(&blinding_factor);
if (!BN_rand_range_ex(&blinding_factor, 1, &mont->N) ||
!BN_mod_mul_montgomery(out, &blinding_factor, a, mont, ctx) ||
!BN_mod_inverse_odd(out, out_no_inverse, out, &mont->N, ctx) ||
!BN_mod_mul_montgomery(out, &blinding_factor, out, mont, ctx)) {
OPENSSL_PUT_ERROR(BN, ERR_R_BN_LIB);
goto err;
}
ret = 1;
err:
BN_free(&blinding_factor);
return ret;
}
int bn_mod_inverse_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
BN_CTX *ctx, const BN_MONT_CTX *mont_p) {
BN_CTX_start(ctx);
BIGNUM *p_minus_2 = BN_CTX_get(ctx);
int ok = p_minus_2 != NULL &&
BN_copy(p_minus_2, p) &&
BN_sub_word(p_minus_2, 2) &&
BN_mod_exp_mont(out, a, p_minus_2, p, ctx, mont_p);
BN_CTX_end(ctx);
return ok;
}
int bn_mod_inverse_secret_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
BN_CTX *ctx, const BN_MONT_CTX *mont_p) {
BN_CTX_start(ctx);
BIGNUM *p_minus_2 = BN_CTX_get(ctx);
int ok = p_minus_2 != NULL &&
BN_copy(p_minus_2, p) &&
BN_sub_word(p_minus_2, 2) &&
BN_mod_exp_mont_consttime(out, a, p_minus_2, p, ctx, mont_p);
BN_CTX_end(ctx);
return ok;
}

View File

@ -0,0 +1,325 @@
/* Copyright (c) 2018, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#include <CBigNumBoringSSL_bn.h>
#include <assert.h>
#include <CBigNumBoringSSL_err.h>
#include "internal.h"
static BN_ULONG word_is_odd_mask(BN_ULONG a) { return (BN_ULONG)0 - (a & 1); }
static void maybe_rshift1_words(BN_ULONG *a, BN_ULONG mask, BN_ULONG *tmp,
size_t num) {
bn_rshift1_words(tmp, a, num);
bn_select_words(a, mask, tmp, a, num);
}
static void maybe_rshift1_words_carry(BN_ULONG *a, BN_ULONG carry,
BN_ULONG mask, BN_ULONG *tmp,
size_t num) {
maybe_rshift1_words(a, mask, tmp, num);
if (num != 0) {
carry &= mask;
a[num - 1] |= carry << (BN_BITS2-1);
}
}
static BN_ULONG maybe_add_words(BN_ULONG *a, BN_ULONG mask, const BN_ULONG *b,
BN_ULONG *tmp, size_t num) {
BN_ULONG carry = bn_add_words(tmp, a, b, num);
bn_select_words(a, mask, tmp, a, num);
return carry & mask;
}
static int bn_gcd_consttime(BIGNUM *r, unsigned *out_shift, const BIGNUM *x,
const BIGNUM *y, BN_CTX *ctx) {
size_t width = x->width > y->width ? x->width : y->width;
if (width == 0) {
*out_shift = 0;
BN_zero(r);
return 1;
}
// This is a constant-time implementation of Stein's algorithm (binary GCD).
int ret = 0;
BN_CTX_start(ctx);
BIGNUM *u = BN_CTX_get(ctx);
BIGNUM *v = BN_CTX_get(ctx);
BIGNUM *tmp = BN_CTX_get(ctx);
if (u == NULL || v == NULL || tmp == NULL ||
!BN_copy(u, x) ||
!BN_copy(v, y) ||
!bn_resize_words(u, width) ||
!bn_resize_words(v, width) ||
!bn_resize_words(tmp, width)) {
goto err;
}
// Each loop iteration halves at least one of |u| and |v|. Thus we need at
// most the combined bit width of inputs for at least one value to be zero.
unsigned x_bits = x->width * BN_BITS2, y_bits = y->width * BN_BITS2;
unsigned num_iters = x_bits + y_bits;
if (num_iters < x_bits) {
OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
goto err;
}
unsigned shift = 0;
for (unsigned i = 0; i < num_iters; i++) {
BN_ULONG both_odd = word_is_odd_mask(u->d[0]) & word_is_odd_mask(v->d[0]);
// If both |u| and |v| are odd, subtract the smaller from the larger.
BN_ULONG u_less_than_v =
(BN_ULONG)0 - bn_sub_words(tmp->d, u->d, v->d, width);
bn_select_words(u->d, both_odd & ~u_less_than_v, tmp->d, u->d, width);
bn_sub_words(tmp->d, v->d, u->d, width);
bn_select_words(v->d, both_odd & u_less_than_v, tmp->d, v->d, width);
// At least one of |u| and |v| is now even.
BN_ULONG u_is_odd = word_is_odd_mask(u->d[0]);
BN_ULONG v_is_odd = word_is_odd_mask(v->d[0]);
assert(!(u_is_odd & v_is_odd));
// If both are even, the final GCD gains a factor of two.
shift += 1 & (~u_is_odd & ~v_is_odd);
// Halve any which are even.
maybe_rshift1_words(u->d, ~u_is_odd, tmp->d, width);
maybe_rshift1_words(v->d, ~v_is_odd, tmp->d, width);
}
// One of |u| or |v| is zero at this point. The algorithm usually makes |u|
// zero, unless |y| was already zero on input. Fix this by combining the
// values.
assert(BN_is_zero(u) || BN_is_zero(v));
for (size_t i = 0; i < width; i++) {
v->d[i] |= u->d[i];
}
*out_shift = shift;
ret = bn_set_words(r, v->d, width);
err:
BN_CTX_end(ctx);
return ret;
}
int BN_gcd(BIGNUM *r, const BIGNUM *x, const BIGNUM *y, BN_CTX *ctx) {
unsigned shift;
return bn_gcd_consttime(r, &shift, x, y, ctx) &&
BN_lshift(r, r, shift);
}
int bn_is_relatively_prime(int *out_relatively_prime, const BIGNUM *x,
const BIGNUM *y, BN_CTX *ctx) {
int ret = 0;
BN_CTX_start(ctx);
unsigned shift;
BIGNUM *gcd = BN_CTX_get(ctx);
if (gcd == NULL ||
!bn_gcd_consttime(gcd, &shift, x, y, ctx)) {
goto err;
}
// Check that 2^|shift| * |gcd| is one.
if (gcd->width == 0) {
*out_relatively_prime = 0;
} else {
BN_ULONG mask = shift | (gcd->d[0] ^ 1);
for (int i = 1; i < gcd->width; i++) {
mask |= gcd->d[i];
}
*out_relatively_prime = mask == 0;
}
ret = 1;
err:
BN_CTX_end(ctx);
return ret;
}
int bn_lcm_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
BN_CTX_start(ctx);
unsigned shift;
BIGNUM *gcd = BN_CTX_get(ctx);
int ret = gcd != NULL &&
bn_mul_consttime(r, a, b, ctx) &&
bn_gcd_consttime(gcd, &shift, a, b, ctx) &&
bn_div_consttime(r, NULL, r, gcd, ctx) &&
bn_rshift_secret_shift(r, r, shift, ctx);
BN_CTX_end(ctx);
return ret;
}
int bn_mod_inverse_consttime(BIGNUM *r, int *out_no_inverse, const BIGNUM *a,
const BIGNUM *n, BN_CTX *ctx) {
*out_no_inverse = 0;
if (BN_is_negative(a) || BN_ucmp(a, n) >= 0) {
OPENSSL_PUT_ERROR(BN, BN_R_INPUT_NOT_REDUCED);
return 0;
}
if (BN_is_zero(a)) {
if (BN_is_one(n)) {
BN_zero(r);
return 1;
}
*out_no_inverse = 1;
OPENSSL_PUT_ERROR(BN, BN_R_NO_INVERSE);
return 0;
}
// This is a constant-time implementation of the extended binary GCD
// algorithm. It is adapted from the Handbook of Applied Cryptography, section
// 14.4.3, algorithm 14.51, and modified to bound coefficients and avoid
// negative numbers.
//
// For more details and proof of correctness, see
// https://github.com/mit-plv/fiat-crypto/pull/333. In particular, see |step|
// and |mod_inverse_consttime| for the algorithm in Gallina and see
// |mod_inverse_consttime_spec| for the correctness result.
if (!BN_is_odd(a) && !BN_is_odd(n)) {
*out_no_inverse = 1;
OPENSSL_PUT_ERROR(BN, BN_R_NO_INVERSE);
return 0;
}
// This function exists to compute the RSA private exponent, where |a| is one
// word. We'll thus use |a_width| when available.
size_t n_width = n->width, a_width = a->width;
if (a_width > n_width) {
a_width = n_width;
}
int ret = 0;
BN_CTX_start(ctx);
BIGNUM *u = BN_CTX_get(ctx);
BIGNUM *v = BN_CTX_get(ctx);
BIGNUM *A = BN_CTX_get(ctx);
BIGNUM *B = BN_CTX_get(ctx);
BIGNUM *C = BN_CTX_get(ctx);
BIGNUM *D = BN_CTX_get(ctx);
BIGNUM *tmp = BN_CTX_get(ctx);
BIGNUM *tmp2 = BN_CTX_get(ctx);
if (u == NULL || v == NULL || A == NULL || B == NULL || C == NULL ||
D == NULL || tmp == NULL || tmp2 == NULL ||
!BN_copy(u, a) ||
!BN_copy(v, n) ||
!BN_one(A) ||
!BN_one(D) ||
// For convenience, size |u| and |v| equivalently.
!bn_resize_words(u, n_width) ||
!bn_resize_words(v, n_width) ||
// |A| and |C| are bounded by |m|.
!bn_resize_words(A, n_width) ||
!bn_resize_words(C, n_width) ||
// |B| and |D| are bounded by |a|.
!bn_resize_words(B, a_width) ||
!bn_resize_words(D, a_width) ||
// |tmp| and |tmp2| may be used at either size.
!bn_resize_words(tmp, n_width) ||
!bn_resize_words(tmp2, n_width)) {
goto err;
}
// Each loop iteration halves at least one of |u| and |v|. Thus we need at
// most the combined bit width of inputs for at least one value to be zero.
unsigned a_bits = a_width * BN_BITS2, n_bits = n_width * BN_BITS2;
unsigned num_iters = a_bits + n_bits;
if (num_iters < a_bits) {
OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
goto err;
}
// Before and after each loop iteration, the following hold:
//
// u = A*a - B*n
// v = D*n - C*a
// 0 < u <= a
// 0 <= v <= n
// 0 <= A < n
// 0 <= B <= a
// 0 <= C < n
// 0 <= D <= a
//
// After each loop iteration, u and v only get smaller, and at least one of
// them shrinks by at least a factor of two.
for (unsigned i = 0; i < num_iters; i++) {
BN_ULONG both_odd = word_is_odd_mask(u->d[0]) & word_is_odd_mask(v->d[0]);
// If both |u| and |v| are odd, subtract the smaller from the larger.
BN_ULONG v_less_than_u =
(BN_ULONG)0 - bn_sub_words(tmp->d, v->d, u->d, n_width);
bn_select_words(v->d, both_odd & ~v_less_than_u, tmp->d, v->d, n_width);
bn_sub_words(tmp->d, u->d, v->d, n_width);
bn_select_words(u->d, both_odd & v_less_than_u, tmp->d, u->d, n_width);
// If we updated one of the values, update the corresponding coefficient.
BN_ULONG carry = bn_add_words(tmp->d, A->d, C->d, n_width);
carry -= bn_sub_words(tmp2->d, tmp->d, n->d, n_width);
bn_select_words(tmp->d, carry, tmp->d, tmp2->d, n_width);
bn_select_words(A->d, both_odd & v_less_than_u, tmp->d, A->d, n_width);
bn_select_words(C->d, both_odd & ~v_less_than_u, tmp->d, C->d, n_width);
bn_add_words(tmp->d, B->d, D->d, a_width);
bn_sub_words(tmp2->d, tmp->d, a->d, a_width);
bn_select_words(tmp->d, carry, tmp->d, tmp2->d, a_width);
bn_select_words(B->d, both_odd & v_less_than_u, tmp->d, B->d, a_width);
bn_select_words(D->d, both_odd & ~v_less_than_u, tmp->d, D->d, a_width);
// Our loop invariants hold at this point. Additionally, exactly one of |u|
// and |v| is now even.
BN_ULONG u_is_even = ~word_is_odd_mask(u->d[0]);
BN_ULONG v_is_even = ~word_is_odd_mask(v->d[0]);
assert(u_is_even != v_is_even);
// Halve the even one and adjust the corresponding coefficient.
maybe_rshift1_words(u->d, u_is_even, tmp->d, n_width);
BN_ULONG A_or_B_is_odd =
word_is_odd_mask(A->d[0]) | word_is_odd_mask(B->d[0]);
BN_ULONG A_carry =
maybe_add_words(A->d, A_or_B_is_odd & u_is_even, n->d, tmp->d, n_width);
BN_ULONG B_carry =
maybe_add_words(B->d, A_or_B_is_odd & u_is_even, a->d, tmp->d, a_width);
maybe_rshift1_words_carry(A->d, A_carry, u_is_even, tmp->d, n_width);
maybe_rshift1_words_carry(B->d, B_carry, u_is_even, tmp->d, a_width);
maybe_rshift1_words(v->d, v_is_even, tmp->d, n_width);
BN_ULONG C_or_D_is_odd =
word_is_odd_mask(C->d[0]) | word_is_odd_mask(D->d[0]);
BN_ULONG C_carry =
maybe_add_words(C->d, C_or_D_is_odd & v_is_even, n->d, tmp->d, n_width);
BN_ULONG D_carry =
maybe_add_words(D->d, C_or_D_is_odd & v_is_even, a->d, tmp->d, a_width);
maybe_rshift1_words_carry(C->d, C_carry, v_is_even, tmp->d, n_width);
maybe_rshift1_words_carry(D->d, D_carry, v_is_even, tmp->d, a_width);
}
assert(BN_is_zero(v));
if (!BN_is_one(u)) {
*out_no_inverse = 1;
OPENSSL_PUT_ERROR(BN, BN_R_NO_INVERSE);
goto err;
}
ret = BN_copy(r, A) != NULL;
err:
BN_CTX_end(ctx);
return ret;
}

View File

@ -0,0 +1,711 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.] */
#include <CBigNumBoringSSL_bn.h>
#include <assert.h>
#include "internal.h"
// This file has two other implementations: x86 assembly language in
// asm/bn-586.pl and x86_64 inline assembly in asm/x86_64-gcc.c.
#if defined(OPENSSL_NO_ASM) || \
!(defined(OPENSSL_X86) || \
(defined(OPENSSL_X86_64) && (defined(__GNUC__) || defined(__clang__))))
#ifdef BN_ULLONG
#define mul_add(r, a, w, c) \
do { \
BN_ULLONG t; \
t = (BN_ULLONG)(w) * (a) + (r) + (c); \
(r) = Lw(t); \
(c) = Hw(t); \
} while (0)
#define mul(r, a, w, c) \
do { \
BN_ULLONG t; \
t = (BN_ULLONG)(w) * (a) + (c); \
(r) = Lw(t); \
(c) = Hw(t); \
} while (0)
#define sqr(r0, r1, a) \
do { \
BN_ULLONG t; \
t = (BN_ULLONG)(a) * (a); \
(r0) = Lw(t); \
(r1) = Hw(t); \
} while (0)
#else
#define mul_add(r, a, w, c) \
do { \
BN_ULONG high, low, ret, tmp = (a); \
ret = (r); \
BN_UMULT_LOHI(low, high, w, tmp); \
ret += (c); \
(c) = (ret < (c)) ? 1 : 0; \
(c) += high; \
ret += low; \
(c) += (ret < low) ? 1 : 0; \
(r) = ret; \
} while (0)
#define mul(r, a, w, c) \
do { \
BN_ULONG high, low, ret, ta = (a); \
BN_UMULT_LOHI(low, high, w, ta); \
ret = low + (c); \
(c) = high; \
(c) += (ret < low) ? 1 : 0; \
(r) = ret; \
} while (0)
#define sqr(r0, r1, a) \
do { \
BN_ULONG tmp = (a); \
BN_UMULT_LOHI(r0, r1, tmp, tmp); \
} while (0)
#endif // !BN_ULLONG
BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
BN_ULONG w) {
BN_ULONG c1 = 0;
if (num == 0) {
return c1;
}
while (num & ~3) {
mul_add(rp[0], ap[0], w, c1);
mul_add(rp[1], ap[1], w, c1);
mul_add(rp[2], ap[2], w, c1);
mul_add(rp[3], ap[3], w, c1);
ap += 4;
rp += 4;
num -= 4;
}
while (num) {
mul_add(rp[0], ap[0], w, c1);
ap++;
rp++;
num--;
}
return c1;
}
BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
BN_ULONG w) {
BN_ULONG c1 = 0;
if (num == 0) {
return c1;
}
while (num & ~3) {
mul(rp[0], ap[0], w, c1);
mul(rp[1], ap[1], w, c1);
mul(rp[2], ap[2], w, c1);
mul(rp[3], ap[3], w, c1);
ap += 4;
rp += 4;
num -= 4;
}
while (num) {
mul(rp[0], ap[0], w, c1);
ap++;
rp++;
num--;
}
return c1;
}
void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, size_t n) {
if (n == 0) {
return;
}
while (n & ~3) {
sqr(r[0], r[1], a[0]);
sqr(r[2], r[3], a[1]);
sqr(r[4], r[5], a[2]);
sqr(r[6], r[7], a[3]);
a += 4;
r += 8;
n -= 4;
}
while (n) {
sqr(r[0], r[1], a[0]);
a++;
r += 2;
n--;
}
}
#ifdef BN_ULLONG
BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
size_t n) {
BN_ULLONG ll = 0;
if (n == 0) {
return 0;
}
while (n & ~3) {
ll += (BN_ULLONG)a[0] + b[0];
r[0] = (BN_ULONG)ll;
ll >>= BN_BITS2;
ll += (BN_ULLONG)a[1] + b[1];
r[1] = (BN_ULONG)ll;
ll >>= BN_BITS2;
ll += (BN_ULLONG)a[2] + b[2];
r[2] = (BN_ULONG)ll;
ll >>= BN_BITS2;
ll += (BN_ULLONG)a[3] + b[3];
r[3] = (BN_ULONG)ll;
ll >>= BN_BITS2;
a += 4;
b += 4;
r += 4;
n -= 4;
}
while (n) {
ll += (BN_ULLONG)a[0] + b[0];
r[0] = (BN_ULONG)ll;
ll >>= BN_BITS2;
a++;
b++;
r++;
n--;
}
return (BN_ULONG)ll;
}
#else // !BN_ULLONG
BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
size_t n) {
BN_ULONG c, l, t;
if (n == 0) {
return (BN_ULONG)0;
}
c = 0;
while (n & ~3) {
t = a[0];
t += c;
c = (t < c);
l = t + b[0];
c += (l < t);
r[0] = l;
t = a[1];
t += c;
c = (t < c);
l = t + b[1];
c += (l < t);
r[1] = l;
t = a[2];
t += c;
c = (t < c);
l = t + b[2];
c += (l < t);
r[2] = l;
t = a[3];
t += c;
c = (t < c);
l = t + b[3];
c += (l < t);
r[3] = l;
a += 4;
b += 4;
r += 4;
n -= 4;
}
while (n) {
t = a[0];
t += c;
c = (t < c);
l = t + b[0];
c += (l < t);
r[0] = l;
a++;
b++;
r++;
n--;
}
return (BN_ULONG)c;
}
#endif // !BN_ULLONG
BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
size_t n) {
BN_ULONG t1, t2;
int c = 0;
if (n == 0) {
return (BN_ULONG)0;
}
while (n & ~3) {
t1 = a[0];
t2 = b[0];
r[0] = t1 - t2 - c;
if (t1 != t2) {
c = (t1 < t2);
}
t1 = a[1];
t2 = b[1];
r[1] = t1 - t2 - c;
if (t1 != t2) {
c = (t1 < t2);
}
t1 = a[2];
t2 = b[2];
r[2] = t1 - t2 - c;
if (t1 != t2) {
c = (t1 < t2);
}
t1 = a[3];
t2 = b[3];
r[3] = t1 - t2 - c;
if (t1 != t2) {
c = (t1 < t2);
}
a += 4;
b += 4;
r += 4;
n -= 4;
}
while (n) {
t1 = a[0];
t2 = b[0];
r[0] = t1 - t2 - c;
if (t1 != t2) {
c = (t1 < t2);
}
a++;
b++;
r++;
n--;
}
return c;
}
// mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0)
// mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0)
// sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0)
// sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0)
#ifdef BN_ULLONG
// Keep in mind that additions to multiplication result can not overflow,
// because its high half cannot be all-ones.
#define mul_add_c(a, b, c0, c1, c2) \
do { \
BN_ULONG hi; \
BN_ULLONG t = (BN_ULLONG)(a) * (b); \
t += (c0); /* no carry */ \
(c0) = (BN_ULONG)Lw(t); \
hi = (BN_ULONG)Hw(t); \
(c1) += (hi); \
if ((c1) < hi) { \
(c2)++; \
} \
} while (0)
#define mul_add_c2(a, b, c0, c1, c2) \
do { \
BN_ULONG hi; \
BN_ULLONG t = (BN_ULLONG)(a) * (b); \
BN_ULLONG tt = t + (c0); /* no carry */ \
(c0) = (BN_ULONG)Lw(tt); \
hi = (BN_ULONG)Hw(tt); \
(c1) += hi; \
if ((c1) < hi) { \
(c2)++; \
} \
t += (c0); /* no carry */ \
(c0) = (BN_ULONG)Lw(t); \
hi = (BN_ULONG)Hw(t); \
(c1) += hi; \
if ((c1) < hi) { \
(c2)++; \
} \
} while (0)
#define sqr_add_c(a, i, c0, c1, c2) \
do { \
BN_ULONG hi; \
BN_ULLONG t = (BN_ULLONG)(a)[i] * (a)[i]; \
t += (c0); /* no carry */ \
(c0) = (BN_ULONG)Lw(t); \
hi = (BN_ULONG)Hw(t); \
(c1) += hi; \
if ((c1) < hi) { \
(c2)++; \
} \
} while (0)
#define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
#else
// Keep in mind that additions to hi can not overflow, because the high word of
// a multiplication result cannot be all-ones.
#define mul_add_c(a, b, c0, c1, c2) \
do { \
BN_ULONG ta = (a), tb = (b); \
BN_ULONG lo, hi; \
BN_UMULT_LOHI(lo, hi, ta, tb); \
(c0) += lo; \
hi += ((c0) < lo) ? 1 : 0; \
(c1) += hi; \
(c2) += ((c1) < hi) ? 1 : 0; \
} while (0)
#define mul_add_c2(a, b, c0, c1, c2) \
do { \
BN_ULONG ta = (a), tb = (b); \
BN_ULONG lo, hi, tt; \
BN_UMULT_LOHI(lo, hi, ta, tb); \
(c0) += lo; \
tt = hi + (((c0) < lo) ? 1 : 0); \
(c1) += tt; \
(c2) += ((c1) < tt) ? 1 : 0; \
(c0) += lo; \
hi += (c0 < lo) ? 1 : 0; \
(c1) += hi; \
(c2) += ((c1) < hi) ? 1 : 0; \
} while (0)
#define sqr_add_c(a, i, c0, c1, c2) \
do { \
BN_ULONG ta = (a)[i]; \
BN_ULONG lo, hi; \
BN_UMULT_LOHI(lo, hi, ta, ta); \
(c0) += lo; \
hi += (c0 < lo) ? 1 : 0; \
(c1) += hi; \
(c2) += ((c1) < hi) ? 1 : 0; \
} while (0)
#define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
#endif // !BN_ULLONG
void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]) {
BN_ULONG c1, c2, c3;
c1 = 0;
c2 = 0;
c3 = 0;
mul_add_c(a[0], b[0], c1, c2, c3);
r[0] = c1;
c1 = 0;
mul_add_c(a[0], b[1], c2, c3, c1);
mul_add_c(a[1], b[0], c2, c3, c1);
r[1] = c2;
c2 = 0;
mul_add_c(a[2], b[0], c3, c1, c2);
mul_add_c(a[1], b[1], c3, c1, c2);
mul_add_c(a[0], b[2], c3, c1, c2);
r[2] = c3;
c3 = 0;
mul_add_c(a[0], b[3], c1, c2, c3);
mul_add_c(a[1], b[2], c1, c2, c3);
mul_add_c(a[2], b[1], c1, c2, c3);
mul_add_c(a[3], b[0], c1, c2, c3);
r[3] = c1;
c1 = 0;
mul_add_c(a[4], b[0], c2, c3, c1);
mul_add_c(a[3], b[1], c2, c3, c1);
mul_add_c(a[2], b[2], c2, c3, c1);
mul_add_c(a[1], b[3], c2, c3, c1);
mul_add_c(a[0], b[4], c2, c3, c1);
r[4] = c2;
c2 = 0;
mul_add_c(a[0], b[5], c3, c1, c2);
mul_add_c(a[1], b[4], c3, c1, c2);
mul_add_c(a[2], b[3], c3, c1, c2);
mul_add_c(a[3], b[2], c3, c1, c2);
mul_add_c(a[4], b[1], c3, c1, c2);
mul_add_c(a[5], b[0], c3, c1, c2);
r[5] = c3;
c3 = 0;
mul_add_c(a[6], b[0], c1, c2, c3);
mul_add_c(a[5], b[1], c1, c2, c3);
mul_add_c(a[4], b[2], c1, c2, c3);
mul_add_c(a[3], b[3], c1, c2, c3);
mul_add_c(a[2], b[4], c1, c2, c3);
mul_add_c(a[1], b[5], c1, c2, c3);
mul_add_c(a[0], b[6], c1, c2, c3);
r[6] = c1;
c1 = 0;
mul_add_c(a[0], b[7], c2, c3, c1);
mul_add_c(a[1], b[6], c2, c3, c1);
mul_add_c(a[2], b[5], c2, c3, c1);
mul_add_c(a[3], b[4], c2, c3, c1);
mul_add_c(a[4], b[3], c2, c3, c1);
mul_add_c(a[5], b[2], c2, c3, c1);
mul_add_c(a[6], b[1], c2, c3, c1);
mul_add_c(a[7], b[0], c2, c3, c1);
r[7] = c2;
c2 = 0;
mul_add_c(a[7], b[1], c3, c1, c2);
mul_add_c(a[6], b[2], c3, c1, c2);
mul_add_c(a[5], b[3], c3, c1, c2);
mul_add_c(a[4], b[4], c3, c1, c2);
mul_add_c(a[3], b[5], c3, c1, c2);
mul_add_c(a[2], b[6], c3, c1, c2);
mul_add_c(a[1], b[7], c3, c1, c2);
r[8] = c3;
c3 = 0;
mul_add_c(a[2], b[7], c1, c2, c3);
mul_add_c(a[3], b[6], c1, c2, c3);
mul_add_c(a[4], b[5], c1, c2, c3);
mul_add_c(a[5], b[4], c1, c2, c3);
mul_add_c(a[6], b[3], c1, c2, c3);
mul_add_c(a[7], b[2], c1, c2, c3);
r[9] = c1;
c1 = 0;
mul_add_c(a[7], b[3], c2, c3, c1);
mul_add_c(a[6], b[4], c2, c3, c1);
mul_add_c(a[5], b[5], c2, c3, c1);
mul_add_c(a[4], b[6], c2, c3, c1);
mul_add_c(a[3], b[7], c2, c3, c1);
r[10] = c2;
c2 = 0;
mul_add_c(a[4], b[7], c3, c1, c2);
mul_add_c(a[5], b[6], c3, c1, c2);
mul_add_c(a[6], b[5], c3, c1, c2);
mul_add_c(a[7], b[4], c3, c1, c2);
r[11] = c3;
c3 = 0;
mul_add_c(a[7], b[5], c1, c2, c3);
mul_add_c(a[6], b[6], c1, c2, c3);
mul_add_c(a[5], b[7], c1, c2, c3);
r[12] = c1;
c1 = 0;
mul_add_c(a[6], b[7], c2, c3, c1);
mul_add_c(a[7], b[6], c2, c3, c1);
r[13] = c2;
c2 = 0;
mul_add_c(a[7], b[7], c3, c1, c2);
r[14] = c3;
r[15] = c1;
}
void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]) {
BN_ULONG c1, c2, c3;
c1 = 0;
c2 = 0;
c3 = 0;
mul_add_c(a[0], b[0], c1, c2, c3);
r[0] = c1;
c1 = 0;
mul_add_c(a[0], b[1], c2, c3, c1);
mul_add_c(a[1], b[0], c2, c3, c1);
r[1] = c2;
c2 = 0;
mul_add_c(a[2], b[0], c3, c1, c2);
mul_add_c(a[1], b[1], c3, c1, c2);
mul_add_c(a[0], b[2], c3, c1, c2);
r[2] = c3;
c3 = 0;
mul_add_c(a[0], b[3], c1, c2, c3);
mul_add_c(a[1], b[2], c1, c2, c3);
mul_add_c(a[2], b[1], c1, c2, c3);
mul_add_c(a[3], b[0], c1, c2, c3);
r[3] = c1;
c1 = 0;
mul_add_c(a[3], b[1], c2, c3, c1);
mul_add_c(a[2], b[2], c2, c3, c1);
mul_add_c(a[1], b[3], c2, c3, c1);
r[4] = c2;
c2 = 0;
mul_add_c(a[2], b[3], c3, c1, c2);
mul_add_c(a[3], b[2], c3, c1, c2);
r[5] = c3;
c3 = 0;
mul_add_c(a[3], b[3], c1, c2, c3);
r[6] = c1;
r[7] = c2;
}
void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[8]) {
BN_ULONG c1, c2, c3;
c1 = 0;
c2 = 0;
c3 = 0;
sqr_add_c(a, 0, c1, c2, c3);
r[0] = c1;
c1 = 0;
sqr_add_c2(a, 1, 0, c2, c3, c1);
r[1] = c2;
c2 = 0;
sqr_add_c(a, 1, c3, c1, c2);
sqr_add_c2(a, 2, 0, c3, c1, c2);
r[2] = c3;
c3 = 0;
sqr_add_c2(a, 3, 0, c1, c2, c3);
sqr_add_c2(a, 2, 1, c1, c2, c3);
r[3] = c1;
c1 = 0;
sqr_add_c(a, 2, c2, c3, c1);
sqr_add_c2(a, 3, 1, c2, c3, c1);
sqr_add_c2(a, 4, 0, c2, c3, c1);
r[4] = c2;
c2 = 0;
sqr_add_c2(a, 5, 0, c3, c1, c2);
sqr_add_c2(a, 4, 1, c3, c1, c2);
sqr_add_c2(a, 3, 2, c3, c1, c2);
r[5] = c3;
c3 = 0;
sqr_add_c(a, 3, c1, c2, c3);
sqr_add_c2(a, 4, 2, c1, c2, c3);
sqr_add_c2(a, 5, 1, c1, c2, c3);
sqr_add_c2(a, 6, 0, c1, c2, c3);
r[6] = c1;
c1 = 0;
sqr_add_c2(a, 7, 0, c2, c3, c1);
sqr_add_c2(a, 6, 1, c2, c3, c1);
sqr_add_c2(a, 5, 2, c2, c3, c1);
sqr_add_c2(a, 4, 3, c2, c3, c1);
r[7] = c2;
c2 = 0;
sqr_add_c(a, 4, c3, c1, c2);
sqr_add_c2(a, 5, 3, c3, c1, c2);
sqr_add_c2(a, 6, 2, c3, c1, c2);
sqr_add_c2(a, 7, 1, c3, c1, c2);
r[8] = c3;
c3 = 0;
sqr_add_c2(a, 7, 2, c1, c2, c3);
sqr_add_c2(a, 6, 3, c1, c2, c3);
sqr_add_c2(a, 5, 4, c1, c2, c3);
r[9] = c1;
c1 = 0;
sqr_add_c(a, 5, c2, c3, c1);
sqr_add_c2(a, 6, 4, c2, c3, c1);
sqr_add_c2(a, 7, 3, c2, c3, c1);
r[10] = c2;
c2 = 0;
sqr_add_c2(a, 7, 4, c3, c1, c2);
sqr_add_c2(a, 6, 5, c3, c1, c2);
r[11] = c3;
c3 = 0;
sqr_add_c(a, 6, c1, c2, c3);
sqr_add_c2(a, 7, 5, c1, c2, c3);
r[12] = c1;
c1 = 0;
sqr_add_c2(a, 7, 6, c2, c3, c1);
r[13] = c2;
c2 = 0;
sqr_add_c(a, 7, c3, c1, c2);
r[14] = c3;
r[15] = c1;
}
void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]) {
BN_ULONG c1, c2, c3;
c1 = 0;
c2 = 0;
c3 = 0;
sqr_add_c(a, 0, c1, c2, c3);
r[0] = c1;
c1 = 0;
sqr_add_c2(a, 1, 0, c2, c3, c1);
r[1] = c2;
c2 = 0;
sqr_add_c(a, 1, c3, c1, c2);
sqr_add_c2(a, 2, 0, c3, c1, c2);
r[2] = c3;
c3 = 0;
sqr_add_c2(a, 3, 0, c1, c2, c3);
sqr_add_c2(a, 2, 1, c1, c2, c3);
r[3] = c1;
c1 = 0;
sqr_add_c(a, 2, c2, c3, c1);
sqr_add_c2(a, 3, 1, c2, c3, c1);
r[4] = c2;
c2 = 0;
sqr_add_c2(a, 3, 2, c3, c1, c2);
r[5] = c3;
c3 = 0;
sqr_add_c(a, 3, c1, c2, c3);
r[6] = c1;
r[7] = c2;
}
#undef mul_add
#undef mul
#undef sqr
#undef mul_add_c
#undef mul_add_c2
#undef sqr_add_c
#undef sqr_add_c2
#endif

View File

@ -0,0 +1,694 @@
/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.]
*/
/* ====================================================================
* Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*
* This product includes cryptographic software written by Eric Young
* (eay@cryptsoft.com). This product includes software written by Tim
* Hudson (tjh@cryptsoft.com).
*
*/
/* ====================================================================
* Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.
*
* Portions of the attached software ("Contribution") are developed by
* SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project.
*
* The Contribution is licensed pursuant to the Eric Young open source
* license provided above.
*
* The binary polynomial arithmetic software is originally written by
* Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems
* Laboratories. */
#ifndef OPENSSL_HEADER_BN_INTERNAL_H
#define OPENSSL_HEADER_BN_INTERNAL_H
#include <CBigNumBoringSSL_base.h>
#if defined(OPENSSL_X86_64) && defined(_MSC_VER)
OPENSSL_MSVC_PRAGMA(warning(push, 3))
#include <intrin.h>
OPENSSL_MSVC_PRAGMA(warning(pop))
#pragma intrinsic(__umulh, _umul128)
#endif
#include "../../internal.h"
#if defined(__cplusplus)
extern "C" {
#endif
#if defined(OPENSSL_64_BIT)
#if defined(BORINGSSL_HAS_UINT128)
// MSVC doesn't support two-word integers on 64-bit.
#define BN_ULLONG uint128_t
#if defined(BORINGSSL_CAN_DIVIDE_UINT128)
#define BN_CAN_DIVIDE_ULLONG
#endif
#endif
#define BN_BITS2 64
#define BN_BYTES 8
#define BN_BITS4 32
#define BN_MASK2 (0xffffffffffffffffUL)
#define BN_MASK2l (0xffffffffUL)
#define BN_MASK2h (0xffffffff00000000UL)
#define BN_MASK2h1 (0xffffffff80000000UL)
#define BN_MONT_CTX_N0_LIMBS 1
#define BN_DEC_CONV (10000000000000000000UL)
#define BN_DEC_NUM 19
#define TOBN(hi, lo) ((BN_ULONG)(hi) << 32 | (lo))
#elif defined(OPENSSL_32_BIT)
#define BN_ULLONG uint64_t
#define BN_CAN_DIVIDE_ULLONG
#define BN_BITS2 32
#define BN_BYTES 4
#define BN_BITS4 16
#define BN_MASK2 (0xffffffffUL)
#define BN_MASK2l (0xffffUL)
#define BN_MASK2h1 (0xffff8000UL)
#define BN_MASK2h (0xffff0000UL)
// On some 32-bit platforms, Montgomery multiplication is done using 64-bit
// arithmetic with SIMD instructions. On such platforms, |BN_MONT_CTX::n0|
// needs to be two words long. Only certain 32-bit platforms actually make use
// of n0[1] and shorter R value would suffice for the others. However,
// currently only the assembly files know which is which.
#define BN_MONT_CTX_N0_LIMBS 2
#define BN_DEC_CONV (1000000000UL)
#define BN_DEC_NUM 9
#define TOBN(hi, lo) (lo), (hi)
#else
#error "Must define either OPENSSL_32_BIT or OPENSSL_64_BIT"
#endif
#if !defined(OPENSSL_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
#define BN_CAN_USE_INLINE_ASM
#endif
// |BN_mod_exp_mont_consttime| is based on the assumption that the L1 data
// cache line width of the target processor is at least the following value.
#define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH 64
// The number of |BN_ULONG|s needed for the |BN_mod_exp_mont_consttime| stack-
// allocated storage buffer. The buffer is just the right size for the RSAZ
// and is about ~1KB larger than what's necessary (4480 bytes) for 1024-bit
// inputs.
#define MOD_EXP_CTIME_STORAGE_LEN \
(((320u * 3u) + (32u * 9u * 16u)) / sizeof(BN_ULONG))
#define STATIC_BIGNUM(x) \
{ \
(BN_ULONG *)(x), sizeof(x) / sizeof(BN_ULONG), \
sizeof(x) / sizeof(BN_ULONG), 0, BN_FLG_STATIC_DATA \
}
#if defined(BN_ULLONG)
#define Lw(t) ((BN_ULONG)(t))
#define Hw(t) ((BN_ULONG)((t) >> BN_BITS2))
#endif
// bn_minimal_width returns the minimal value of |bn->top| which fits the
// value of |bn|.
int bn_minimal_width(const BIGNUM *bn);
// bn_set_minimal_width sets |bn->width| to |bn_minimal_width(bn)|. If |bn| is
// zero, |bn->neg| is set to zero.
void bn_set_minimal_width(BIGNUM *bn);
// bn_wexpand ensures that |bn| has at least |words| works of space without
// altering its value. It returns one on success or zero on allocation
// failure.
int bn_wexpand(BIGNUM *bn, size_t words);
// bn_expand acts the same as |bn_wexpand|, but takes a number of bits rather
// than a number of words.
int bn_expand(BIGNUM *bn, size_t bits);
// bn_resize_words adjusts |bn->top| to be |words|. It returns one on success
// and zero on allocation error or if |bn|'s value is too large.
OPENSSL_EXPORT int bn_resize_words(BIGNUM *bn, size_t words);
// bn_select_words sets |r| to |a| if |mask| is all ones or |b| if |mask| is
// all zeros.
void bn_select_words(BN_ULONG *r, BN_ULONG mask, const BN_ULONG *a,
const BN_ULONG *b, size_t num);
// bn_set_words sets |bn| to the value encoded in the |num| words in |words|,
// least significant word first.
int bn_set_words(BIGNUM *bn, const BN_ULONG *words, size_t num);
// bn_fits_in_words returns one if |bn| may be represented in |num| words, plus
// a sign bit, and zero otherwise.
int bn_fits_in_words(const BIGNUM *bn, size_t num);
// bn_copy_words copies the value of |bn| to |out| and returns one if the value
// is representable in |num| words. Otherwise, it returns zero.
int bn_copy_words(BN_ULONG *out, size_t num, const BIGNUM *bn);
// bn_mul_add_words multiples |ap| by |w|, adds the result to |rp|, and places
// the result in |rp|. |ap| and |rp| must both be |num| words long. It returns
// the carry word of the operation. |ap| and |rp| may be equal but otherwise may
// not alias.
BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
BN_ULONG w);
// bn_mul_words multiples |ap| by |w| and places the result in |rp|. |ap| and
// |rp| must both be |num| words long. It returns the carry word of the
// operation. |ap| and |rp| may be equal but otherwise may not alias.
BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num, BN_ULONG w);
// bn_sqr_words sets |rp[2*i]| and |rp[2*i+1]| to |ap[i]|'s square, for all |i|
// up to |num|. |ap| is an array of |num| words and |rp| an array of |2*num|
// words. |ap| and |rp| may not alias.
//
// This gives the contribution of the |ap[i]*ap[i]| terms when squaring |ap|.
void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num);
// bn_add_words adds |ap| to |bp| and places the result in |rp|, each of which
// are |num| words long. It returns the carry bit, which is one if the operation
// overflowed and zero otherwise. Any pair of |ap|, |bp|, and |rp| may be equal
// to each other but otherwise may not alias.
BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
size_t num);
// bn_sub_words subtracts |bp| from |ap| and places the result in |rp|. It
// returns the borrow bit, which is one if the computation underflowed and zero
// otherwise. Any pair of |ap|, |bp|, and |rp| may be equal to each other but
// otherwise may not alias.
BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
size_t num);
// bn_mul_comba4 sets |r| to the product of |a| and |b|.
void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]);
// bn_mul_comba8 sets |r| to the product of |a| and |b|.
void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]);
// bn_sqr_comba8 sets |r| to |a|^2.
void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[4]);
// bn_sqr_comba4 sets |r| to |a|^2.
void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]);
// bn_less_than_words returns one if |a| < |b| and zero otherwise, where |a|
// and |b| both are |len| words long. It runs in constant time.
int bn_less_than_words(const BN_ULONG *a, const BN_ULONG *b, size_t len);
// bn_in_range_words returns one if |min_inclusive| <= |a| < |max_exclusive|,
// where |a| and |max_exclusive| both are |len| words long. |a| and
// |max_exclusive| are treated as secret.
int bn_in_range_words(const BN_ULONG *a, BN_ULONG min_inclusive,
const BN_ULONG *max_exclusive, size_t len);
// bn_rand_range_words sets |out| to a uniformly distributed random number from
// |min_inclusive| to |max_exclusive|. Both |out| and |max_exclusive| are |len|
// words long.
//
// This function runs in time independent of the result, but |min_inclusive| and
// |max_exclusive| are public data. (Information about the range is unavoidably
// leaked by how many iterations it took to select a number.)
int bn_rand_range_words(BN_ULONG *out, BN_ULONG min_inclusive,
const BN_ULONG *max_exclusive, size_t len,
const uint8_t additional_data[32]);
// bn_range_secret_range behaves like |BN_rand_range_ex|, but treats
// |max_exclusive| as secret. Because of this constraint, the distribution of
// values returned is more complex.
//
// Rather than repeatedly generating values until one is in range, which would
// leak information, it generates one value. If the value is in range, it sets
// |*out_is_uniform| to one. Otherwise, it sets |*out_is_uniform| to zero,
// fixing up the value to force it in range.
//
// The subset of calls to |bn_rand_secret_range| which set |*out_is_uniform| to
// one are uniformly distributed in the target range. Calls overall are not.
// This function is intended for use in situations where the extra values are
// still usable and where the number of iterations needed to reach the target
// number of uniform outputs may be blinded for negligible probabilities of
// timing leaks.
//
// Although this function treats |max_exclusive| as secret, it treats the number
// of bits in |max_exclusive| as public.
int bn_rand_secret_range(BIGNUM *r, int *out_is_uniform, BN_ULONG min_inclusive,
const BIGNUM *max_exclusive);
#if !defined(OPENSSL_NO_ASM) && \
(defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
#define OPENSSL_BN_ASM_MONT
// bn_mul_mont writes |ap| * |bp| mod |np| to |rp|, each |num| words
// long. Inputs and outputs are in Montgomery form. |n0| is a pointer to the
// corresponding field in |BN_MONT_CTX|. It returns one if |bn_mul_mont| handles
// inputs of this size and zero otherwise.
//
// TODO(davidben): The x86_64 implementation expects a 32-bit input and masks
// off upper bits. The aarch64 implementation expects a 64-bit input and does
// not. |size_t| is the safer option but not strictly correct for x86_64. But
// this function implicitly already has a bound on the size of |num| because it
// internally creates |num|-sized stack allocation.
//
// See also discussion in |ToWord| in abi_test.h for notes on smaller-than-word
// inputs.
int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
const BN_ULONG *np, const BN_ULONG *n0, size_t num);
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
#define OPENSSL_BN_ASM_MONT5
// bn_mul_mont_gather5 multiples loads index |power| of |table|, multiplies it
// by |ap| modulo |np|, and stores the result in |rp|. The values are |num|
// words long and represented in Montgomery form. |n0| is a pointer to the
// corresponding field in |BN_MONT_CTX|.
void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
const BN_ULONG *table, const BN_ULONG *np,
const BN_ULONG *n0, int num, int power);
// bn_scatter5 stores |inp| to index |power| of |table|. |inp| and each entry of
// |table| are |num| words long. |power| must be less than 32. |table| must be
// 32*|num| words long.
void bn_scatter5(const BN_ULONG *inp, size_t num, BN_ULONG *table,
size_t power);
// bn_gather5 loads index |power| of |table| and stores it in |out|. |out| and
// each entry of |table| are |num| words long. |power| must be less than 32.
void bn_gather5(BN_ULONG *out, size_t num, BN_ULONG *table, size_t power);
// bn_power5 squares |ap| five times and multiplies it by the value stored at
// index |power| of |table|, modulo |np|. It stores the result in |rp|. The
// values are |num| words long and represented in Montgomery form. |n0| is a
// pointer to the corresponding field in |BN_MONT_CTX|. |num| must be divisible
// by 8.
void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table,
const BN_ULONG *np, const BN_ULONG *n0, int num, int power);
// bn_from_montgomery converts |ap| from Montgomery form modulo |np| and writes
// the result in |rp|, each of which is |num| words long. It returns one on
// success and zero if it cannot handle inputs of length |num|. |n0| is a
// pointer to the corresponding field in |BN_MONT_CTX|.
int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
const BN_ULONG *not_used, const BN_ULONG *np,
const BN_ULONG *n0, int num);
#endif // !OPENSSL_NO_ASM && OPENSSL_X86_64
uint64_t bn_mont_n0(const BIGNUM *n);
// bn_mod_exp_base_2_consttime calculates r = 2**p (mod n). |p| must be larger
// than log_2(n); i.e. 2**p must be larger than |n|. |n| must be positive and
// odd. |p| and the bit width of |n| are assumed public, but |n| is otherwise
// treated as secret.
int bn_mod_exp_base_2_consttime(BIGNUM *r, unsigned p, const BIGNUM *n,
BN_CTX *ctx);
#if defined(OPENSSL_X86_64) && defined(_MSC_VER)
#define BN_UMULT_LOHI(low, high, a, b) ((low) = _umul128((a), (b), &(high)))
#endif
#if !defined(BN_ULLONG) && !defined(BN_UMULT_LOHI)
#error "Either BN_ULLONG or BN_UMULT_LOHI must be defined on every platform."
#endif
// bn_jacobi returns the Jacobi symbol of |a| and |b| (which is -1, 0 or 1), or
// -2 on error.
int bn_jacobi(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
// bn_is_bit_set_words returns one if bit |bit| is set in |a| and zero
// otherwise.
int bn_is_bit_set_words(const BN_ULONG *a, size_t num, unsigned bit);
// bn_one_to_montgomery sets |r| to one in Montgomery form. It returns one on
// success and zero on error. This function treats the bit width of the modulus
// as public.
int bn_one_to_montgomery(BIGNUM *r, const BN_MONT_CTX *mont, BN_CTX *ctx);
// bn_less_than_montgomery_R returns one if |bn| is less than the Montgomery R
// value for |mont| and zero otherwise.
int bn_less_than_montgomery_R(const BIGNUM *bn, const BN_MONT_CTX *mont);
// bn_mod_u16_consttime returns |bn| mod |d|, ignoring |bn|'s sign bit. It runs
// in time independent of the value of |bn|, but it treats |d| as public.
OPENSSL_EXPORT uint16_t bn_mod_u16_consttime(const BIGNUM *bn, uint16_t d);
// bn_odd_number_is_obviously_composite returns one if |bn| is divisible by one
// of the first several odd primes and zero otherwise.
int bn_odd_number_is_obviously_composite(const BIGNUM *bn);
// A BN_MILLER_RABIN stores state common to each Miller-Rabin iteration. It is
// initialized within an existing |BN_CTX| scope and may not be used after
// that scope is released with |BN_CTX_end|. Field names match those in FIPS
// 186-4, section C.3.1.
typedef struct {
// w1 is w-1.
BIGNUM *w1;
// m is (w-1)/2^a.
BIGNUM *m;
// one_mont is 1 (mod w) in Montgomery form.
BIGNUM *one_mont;
// w1_mont is w-1 (mod w) in Montgomery form.
BIGNUM *w1_mont;
// w_bits is BN_num_bits(w).
int w_bits;
// a is the largest integer such that 2^a divides w-1.
int a;
} BN_MILLER_RABIN;
// bn_miller_rabin_init initializes |miller_rabin| for testing if |mont->N| is
// prime. It returns one on success and zero on error.
OPENSSL_EXPORT int bn_miller_rabin_init(BN_MILLER_RABIN *miller_rabin,
const BN_MONT_CTX *mont, BN_CTX *ctx);
// bn_miller_rabin_iteration performs one Miller-Rabin iteration, checking if
// |b| is a composite witness for |mont->N|. |miller_rabin| must have been
// initialized with |bn_miller_rabin_setup|. On success, it returns one and sets
// |*out_is_possibly_prime| to one if |mont->N| may still be prime or zero if
// |b| shows it is composite. On allocation or internal failure, it returns
// zero.
OPENSSL_EXPORT int bn_miller_rabin_iteration(
const BN_MILLER_RABIN *miller_rabin, int *out_is_possibly_prime,
const BIGNUM *b, const BN_MONT_CTX *mont, BN_CTX *ctx);
// bn_rshift1_words sets |r| to |a| >> 1, where both arrays are |num| bits wide.
void bn_rshift1_words(BN_ULONG *r, const BN_ULONG *a, size_t num);
// bn_rshift_words sets |r| to |a| >> |shift|, where both arrays are |num| bits
// wide.
void bn_rshift_words(BN_ULONG *r, const BN_ULONG *a, unsigned shift,
size_t num);
// bn_rshift_secret_shift behaves like |BN_rshift| but runs in time independent
// of both |a| and |n|.
OPENSSL_EXPORT int bn_rshift_secret_shift(BIGNUM *r, const BIGNUM *a,
unsigned n, BN_CTX *ctx);
// bn_reduce_once sets |r| to |a| mod |m| where 0 <= |a| < 2*|m|. It returns
// zero if |a| < |m| and a mask of all ones if |a| >= |m|. Each array is |num|
// words long, but |a| has an additional word specified by |carry|. |carry| must
// be zero or one, as implied by the bounds on |a|.
//
// |r|, |a|, and |m| may not alias. Use |bn_reduce_once_in_place| if |r| and |a|
// must alias.
BN_ULONG bn_reduce_once(BN_ULONG *r, const BN_ULONG *a, BN_ULONG carry,
const BN_ULONG *m, size_t num);
// bn_reduce_once_in_place behaves like |bn_reduce_once| but acts in-place on
// |r|, using |tmp| as scratch space. |r|, |tmp|, and |m| may not alias.
BN_ULONG bn_reduce_once_in_place(BN_ULONG *r, BN_ULONG carry, const BN_ULONG *m,
BN_ULONG *tmp, size_t num);
// Constant-time non-modular arithmetic.
//
// The following functions implement non-modular arithmetic in constant-time
// and pessimally set |r->width| to the largest possible word size.
//
// Note this means that, e.g., repeatedly multiplying by one will cause widths
// to increase without bound. The corresponding public API functions minimize
// their outputs to avoid regressing calculator consumers.
// bn_uadd_consttime behaves like |BN_uadd|, but it pessimally sets
// |r->width| = |a->width| + |b->width| + 1.
int bn_uadd_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
// bn_usub_consttime behaves like |BN_usub|, but it pessimally sets
// |r->width| = |a->width|.
int bn_usub_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
// bn_abs_sub_consttime sets |r| to the absolute value of |a| - |b|, treating
// both inputs as secret. It returns one on success and zero on error.
OPENSSL_EXPORT int bn_abs_sub_consttime(BIGNUM *r, const BIGNUM *a,
const BIGNUM *b, BN_CTX *ctx);
// bn_mul_consttime behaves like |BN_mul|, but it rejects negative inputs and
// pessimally sets |r->width| to |a->width| + |b->width|, to avoid leaking
// information about |a| and |b|.
int bn_mul_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
// bn_sqrt_consttime behaves like |BN_sqrt|, but it pessimally sets |r->width|
// to 2*|a->width|, to avoid leaking information about |a| and |b|.
int bn_sqr_consttime(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx);
// bn_div_consttime behaves like |BN_div|, but it rejects negative inputs and
// treats both inputs, including their magnitudes, as secret. It is, as a
// result, much slower than |BN_div| and should only be used for rare operations
// where Montgomery reduction is not available.
//
// Note that |quotient->width| will be set pessimally to |numerator->width|.
OPENSSL_EXPORT int bn_div_consttime(BIGNUM *quotient, BIGNUM *remainder,
const BIGNUM *numerator,
const BIGNUM *divisor, BN_CTX *ctx);
// bn_is_relatively_prime checks whether GCD(|x|, |y|) is one. On success, it
// returns one and sets |*out_relatively_prime| to one if the GCD was one and
// zero otherwise. On error, it returns zero.
OPENSSL_EXPORT int bn_is_relatively_prime(int *out_relatively_prime,
const BIGNUM *x, const BIGNUM *y,
BN_CTX *ctx);
// bn_lcm_consttime sets |r| to LCM(|a|, |b|). It returns one and success and
// zero on error. |a| and |b| are both treated as secret.
OPENSSL_EXPORT int bn_lcm_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
BN_CTX *ctx);
// Constant-time modular arithmetic.
//
// The following functions implement basic constant-time modular arithmetic.
// bn_mod_add_words sets |r| to |a| + |b| (mod |m|), using |tmp| as scratch
// space. Each array is |num| words long. |a| and |b| must be < |m|. Any pair of
// |r|, |a|, and |b| may alias.
void bn_mod_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
const BN_ULONG *m, BN_ULONG *tmp, size_t num);
// bn_mod_add_consttime acts like |BN_mod_add_quick| but takes a |BN_CTX|.
int bn_mod_add_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
const BIGNUM *m, BN_CTX *ctx);
// bn_mod_sub_words sets |r| to |a| - |b| (mod |m|), using |tmp| as scratch
// space. Each array is |num| words long. |a| and |b| must be < |m|. Any pair of
// |r|, |a|, and |b| may alias.
void bn_mod_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
const BN_ULONG *m, BN_ULONG *tmp, size_t num);
// bn_mod_sub_consttime acts like |BN_mod_sub_quick| but takes a |BN_CTX|.
int bn_mod_sub_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
const BIGNUM *m, BN_CTX *ctx);
// bn_mod_lshift1_consttime acts like |BN_mod_lshift1_quick| but takes a
// |BN_CTX|.
int bn_mod_lshift1_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *m,
BN_CTX *ctx);
// bn_mod_lshift_consttime acts like |BN_mod_lshift_quick| but takes a |BN_CTX|.
int bn_mod_lshift_consttime(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m,
BN_CTX *ctx);
// bn_mod_inverse_consttime sets |r| to |a|^-1, mod |n|. |a| must be non-
// negative and less than |n|. It returns one on success and zero on error. On
// failure, if the failure was caused by |a| having no inverse mod |n| then
// |*out_no_inverse| will be set to one; otherwise it will be set to zero.
//
// This function treats both |a| and |n| as secret, provided they are both non-
// zero and the inverse exists. It should only be used for even moduli where
// none of the less general implementations are applicable.
OPENSSL_EXPORT int bn_mod_inverse_consttime(BIGNUM *r, int *out_no_inverse,
const BIGNUM *a, const BIGNUM *n,
BN_CTX *ctx);
// bn_mod_inverse_prime sets |out| to the modular inverse of |a| modulo |p|,
// computed with Fermat's Little Theorem. It returns one on success and zero on
// error. If |mont_p| is NULL, one will be computed temporarily.
int bn_mod_inverse_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
BN_CTX *ctx, const BN_MONT_CTX *mont_p);
// bn_mod_inverse_secret_prime behaves like |bn_mod_inverse_prime| but uses
// |BN_mod_exp_mont_consttime| instead of |BN_mod_exp_mont| in hopes of
// protecting the exponent.
int bn_mod_inverse_secret_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
BN_CTX *ctx, const BN_MONT_CTX *mont_p);
// Low-level operations for small numbers.
//
// The following functions implement algorithms suitable for use with scalars
// and field elements in elliptic curves. They rely on the number being small
// both to stack-allocate various temporaries and because they do not implement
// optimizations useful for the larger values used in RSA.
// BN_SMALL_MAX_WORDS is the largest size input these functions handle. This
// limit allows temporaries to be more easily stack-allocated. This limit is set
// to accommodate P-521.
#if defined(OPENSSL_32_BIT)
#define BN_SMALL_MAX_WORDS 17
#else
#define BN_SMALL_MAX_WORDS 9
#endif
// bn_mul_small sets |r| to |a|*|b|. |num_r| must be |num_a| + |num_b|. |r| may
// not alias with |a| or |b|.
void bn_mul_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, size_t num_a,
const BN_ULONG *b, size_t num_b);
// bn_sqr_small sets |r| to |a|^2. |num_a| must be at most |BN_SMALL_MAX_WORDS|.
// |num_r| must be |num_a|*2. |r| and |a| may not alias.
void bn_sqr_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, size_t num_a);
// In the following functions, the modulus must be at most |BN_SMALL_MAX_WORDS|
// words long.
// bn_to_montgomery_small sets |r| to |a| translated to the Montgomery domain.
// |r| and |a| are |num| words long, which must be |mont->N.width|. |a| must be
// fully reduced and may alias |r|.
void bn_to_montgomery_small(BN_ULONG *r, const BN_ULONG *a, size_t num,
const BN_MONT_CTX *mont);
// bn_from_montgomery_small sets |r| to |a| translated out of the Montgomery
// domain. |r| and |a| are |num_r| and |num_a| words long, respectively. |num_r|
// must be |mont->N.width|. |a| must be at most |mont->N|^2 and may alias |r|.
//
// Unlike most of these functions, only |num_r| is bounded by
// |BN_SMALL_MAX_WORDS|. |num_a| may exceed it, but must be at most 2 * |num_r|.
void bn_from_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
size_t num_a, const BN_MONT_CTX *mont);
// bn_mod_mul_montgomery_small sets |r| to |a| * |b| mod |mont->N|. Both inputs
// and outputs are in the Montgomery domain. Each array is |num| words long,
// which must be |mont->N.width|. Any two of |r|, |a|, and |b| may alias. |a|
// and |b| must be reduced on input.
void bn_mod_mul_montgomery_small(BN_ULONG *r, const BN_ULONG *a,
const BN_ULONG *b, size_t num,
const BN_MONT_CTX *mont);
// bn_mod_exp_mont_small sets |r| to |a|^|p| mod |mont->N|. It returns one on
// success and zero on programmer or internal error. Both inputs and outputs are
// in the Montgomery domain. |r| and |a| are |num| words long, which must be
// |mont->N.width| and at most |BN_SMALL_MAX_WORDS|. |a| must be fully-reduced.
// This function runs in time independent of |a|, but |p| and |mont->N| are
// public values. |a| must be fully-reduced and may alias with |r|.
//
// Note this function differs from |BN_mod_exp_mont| which uses Montgomery
// reduction but takes input and output outside the Montgomery domain. Combine
// this function with |bn_from_montgomery_small| and |bn_to_montgomery_small|
// if necessary.
void bn_mod_exp_mont_small(BN_ULONG *r, const BN_ULONG *a, size_t num,
const BN_ULONG *p, size_t num_p,
const BN_MONT_CTX *mont);
// bn_mod_inverse0_prime_mont_small sets |r| to |a|^-1 mod |mont->N|. If |a| is
// zero, |r| is set to zero. |mont->N| must be a prime. |r| and |a| are |num|
// words long, which must be |mont->N.width| and at most |BN_SMALL_MAX_WORDS|.
// |a| must be fully-reduced and may alias |r|. This function runs in time
// independent of |a|, but |mont->N| is a public value.
void bn_mod_inverse0_prime_mont_small(BN_ULONG *r, const BN_ULONG *a,
size_t num, const BN_MONT_CTX *mont);
#if defined(__cplusplus)
} // extern C
#endif
#endif // OPENSSL_HEADER_BN_INTERNAL_H

View File

@ -0,0 +1,146 @@
/* ====================================================================
* Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*
* This product includes cryptographic software written by Eric Young
* (eay@cryptsoft.com). This product includes software written by Tim
* Hudson (tjh@cryptsoft.com). */
#include <CBigNumBoringSSL_bn.h>
#include <CBigNumBoringSSL_err.h>
#include "internal.h"
// least significant word
#define BN_lsw(n) (((n)->width == 0) ? (BN_ULONG) 0 : (n)->d[0])
int bn_jacobi(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
// In 'tab', only odd-indexed entries are relevant:
// For any odd BIGNUM n,
// tab[BN_lsw(n) & 7]
// is $(-1)^{(n^2-1)/8}$ (using TeX notation).
// Note that the sign of n does not matter.
static const int tab[8] = {0, 1, 0, -1, 0, -1, 0, 1};
// The Jacobi symbol is only defined for odd modulus.
if (!BN_is_odd(b)) {
OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
return -2;
}
// Require b be positive.
if (BN_is_negative(b)) {
OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
return -2;
}
int ret = -2;
BN_CTX_start(ctx);
BIGNUM *A = BN_CTX_get(ctx);
BIGNUM *B = BN_CTX_get(ctx);
if (B == NULL) {
goto end;
}
if (!BN_copy(A, a) ||
!BN_copy(B, b)) {
goto end;
}
// Adapted from logic to compute the Kronecker symbol, originally implemented
// according to Henri Cohen, "A Course in Computational Algebraic Number
// Theory" (algorithm 1.4.10).
ret = 1;
while (1) {
// Cohen's step 3:
// B is positive and odd
if (BN_is_zero(A)) {
ret = BN_is_one(B) ? ret : 0;
goto end;
}
// now A is non-zero
int i = 0;
while (!BN_is_bit_set(A, i)) {
i++;
}
if (!BN_rshift(A, A, i)) {
ret = -2;
goto end;
}
if (i & 1) {
// i is odd
// multiply 'ret' by $(-1)^{(B^2-1)/8}$
ret = ret * tab[BN_lsw(B) & 7];
}
// Cohen's step 4:
// multiply 'ret' by $(-1)^{(A-1)(B-1)/4}$
if ((A->neg ? ~BN_lsw(A) : BN_lsw(A)) & BN_lsw(B) & 2) {
ret = -ret;
}
// (A, B) := (B mod |A|, |A|)
if (!BN_nnmod(B, B, A, ctx)) {
ret = -2;
goto end;
}
BIGNUM *tmp = A;
A = B;
B = tmp;
tmp->neg = 0;
}
end:
BN_CTX_end(ctx);
return ret;
}

View File

@ -0,0 +1,502 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.]
*/
/* ====================================================================
* Copyright (c) 1998-2006 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*
* This product includes cryptographic software written by Eric Young
* (eay@cryptsoft.com). This product includes software written by Tim
* Hudson (tjh@cryptsoft.com). */
#include <CBigNumBoringSSL_bn.h>
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <CBigNumBoringSSL_err.h>
#include <CBigNumBoringSSL_mem.h>
#include <CBigNumBoringSSL_thread.h>
#include <CBigNumBoringSSL_type_check.h>
#include "internal.h"
#include "../../internal.h"
BN_MONT_CTX *BN_MONT_CTX_new(void) {
BN_MONT_CTX *ret = OPENSSL_malloc(sizeof(BN_MONT_CTX));
if (ret == NULL) {
return NULL;
}
OPENSSL_memset(ret, 0, sizeof(BN_MONT_CTX));
BN_init(&ret->RR);
BN_init(&ret->N);
return ret;
}
void BN_MONT_CTX_free(BN_MONT_CTX *mont) {
if (mont == NULL) {
return;
}
BN_free(&mont->RR);
BN_free(&mont->N);
OPENSSL_free(mont);
}
BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, const BN_MONT_CTX *from) {
if (to == from) {
return to;
}
if (!BN_copy(&to->RR, &from->RR) ||
!BN_copy(&to->N, &from->N)) {
return NULL;
}
to->n0[0] = from->n0[0];
to->n0[1] = from->n0[1];
return to;
}
static int bn_mont_ctx_set_N_and_n0(BN_MONT_CTX *mont, const BIGNUM *mod) {
if (BN_is_zero(mod)) {
OPENSSL_PUT_ERROR(BN, BN_R_DIV_BY_ZERO);
return 0;
}
if (!BN_is_odd(mod)) {
OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
return 0;
}
if (BN_is_negative(mod)) {
OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
return 0;
}
// Save the modulus.
if (!BN_copy(&mont->N, mod)) {
OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
return 0;
}
// |mont->N| is always stored minimally. Computing RR efficiently leaks the
// size of the modulus. While the modulus may be private in RSA (one of the
// primes), their sizes are public, so this is fine.
bn_set_minimal_width(&mont->N);
// Find n0 such that n0 * N == -1 (mod r).
//
// Only certain BN_BITS2<=32 platforms actually make use of n0[1]. For the
// others, we could use a shorter R value and use faster |BN_ULONG|-based
// math instead of |uint64_t|-based math, which would be double-precision.
// However, currently only the assembler files know which is which.
OPENSSL_STATIC_ASSERT(BN_MONT_CTX_N0_LIMBS == 1 || BN_MONT_CTX_N0_LIMBS == 2,
"BN_MONT_CTX_N0_LIMBS value is invalid");
OPENSSL_STATIC_ASSERT(
sizeof(BN_ULONG) * BN_MONT_CTX_N0_LIMBS == sizeof(uint64_t),
"uint64_t is insufficient precision for n0");
uint64_t n0 = bn_mont_n0(&mont->N);
mont->n0[0] = (BN_ULONG)n0;
#if BN_MONT_CTX_N0_LIMBS == 2
mont->n0[1] = (BN_ULONG)(n0 >> BN_BITS2);
#else
mont->n0[1] = 0;
#endif
return 1;
}
int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) {
if (!bn_mont_ctx_set_N_and_n0(mont, mod)) {
return 0;
}
BN_CTX *new_ctx = NULL;
if (ctx == NULL) {
new_ctx = BN_CTX_new();
if (new_ctx == NULL) {
return 0;
}
ctx = new_ctx;
}
// Save RR = R**2 (mod N). R is the smallest power of 2**BN_BITS2 such that R
// > mod. Even though the assembly on some 32-bit platforms works with 64-bit
// values, using |BN_BITS2| here, rather than |BN_MONT_CTX_N0_LIMBS *
// BN_BITS2|, is correct because R**2 will still be a multiple of the latter
// as |BN_MONT_CTX_N0_LIMBS| is either one or two.
unsigned lgBigR = mont->N.width * BN_BITS2;
BN_zero(&mont->RR);
int ok = BN_set_bit(&mont->RR, lgBigR * 2) &&
BN_mod(&mont->RR, &mont->RR, &mont->N, ctx) &&
bn_resize_words(&mont->RR, mont->N.width);
BN_CTX_free(new_ctx);
return ok;
}
BN_MONT_CTX *BN_MONT_CTX_new_for_modulus(const BIGNUM *mod, BN_CTX *ctx) {
BN_MONT_CTX *mont = BN_MONT_CTX_new();
if (mont == NULL ||
!BN_MONT_CTX_set(mont, mod, ctx)) {
BN_MONT_CTX_free(mont);
return NULL;
}
return mont;
}
BN_MONT_CTX *BN_MONT_CTX_new_consttime(const BIGNUM *mod, BN_CTX *ctx) {
BN_MONT_CTX *mont = BN_MONT_CTX_new();
if (mont == NULL ||
!bn_mont_ctx_set_N_and_n0(mont, mod)) {
goto err;
}
unsigned lgBigR = mont->N.width * BN_BITS2;
if (!bn_mod_exp_base_2_consttime(&mont->RR, lgBigR * 2, &mont->N, ctx) ||
!bn_resize_words(&mont->RR, mont->N.width)) {
goto err;
}
return mont;
err:
BN_MONT_CTX_free(mont);
return NULL;
}
int BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, CRYPTO_MUTEX *lock,
const BIGNUM *mod, BN_CTX *bn_ctx) {
CRYPTO_MUTEX_lock_read(lock);
BN_MONT_CTX *ctx = *pmont;
CRYPTO_MUTEX_unlock_read(lock);
if (ctx) {
return 1;
}
CRYPTO_MUTEX_lock_write(lock);
if (*pmont == NULL) {
*pmont = BN_MONT_CTX_new_for_modulus(mod, bn_ctx);
}
const int ok = *pmont != NULL;
CRYPTO_MUTEX_unlock_write(lock);
return ok;
}
int BN_to_montgomery(BIGNUM *ret, const BIGNUM *a, const BN_MONT_CTX *mont,
BN_CTX *ctx) {
return BN_mod_mul_montgomery(ret, a, &mont->RR, mont, ctx);
}
static int bn_from_montgomery_in_place(BN_ULONG *r, size_t num_r, BN_ULONG *a,
size_t num_a, const BN_MONT_CTX *mont) {
const BN_ULONG *n = mont->N.d;
size_t num_n = mont->N.width;
if (num_r != num_n || num_a != 2 * num_n) {
OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
return 0;
}
// Add multiples of |n| to |r| until R = 2^(nl * BN_BITS2) divides it. On
// input, we had |r| < |n| * R, so now |r| < 2 * |n| * R. Note that |r|
// includes |carry| which is stored separately.
BN_ULONG n0 = mont->n0[0];
BN_ULONG carry = 0;
for (size_t i = 0; i < num_n; i++) {
BN_ULONG v = bn_mul_add_words(a + i, n, num_n, a[i] * n0);
v += carry + a[i + num_n];
carry |= (v != a[i + num_n]);
carry &= (v <= a[i + num_n]);
a[i + num_n] = v;
}
// Shift |num_n| words to divide by R. We have |a| < 2 * |n|. Note that |a|
// includes |carry| which is stored separately.
a += num_n;
// |a| thus requires at most one additional subtraction |n| to be reduced.
bn_reduce_once(r, a, carry, n, num_n);
return 1;
}
static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r,
const BN_MONT_CTX *mont) {
if (r->neg) {
OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
return 0;
}
const BIGNUM *n = &mont->N;
if (n->width == 0) {
ret->width = 0;
return 1;
}
int max = 2 * n->width; // carry is stored separately
if (!bn_resize_words(r, max) ||
!bn_wexpand(ret, n->width)) {
return 0;
}
ret->width = n->width;
ret->neg = 0;
return bn_from_montgomery_in_place(ret->d, ret->width, r->d, r->width, mont);
}
int BN_from_montgomery(BIGNUM *r, const BIGNUM *a, const BN_MONT_CTX *mont,
BN_CTX *ctx) {
int ret = 0;
BIGNUM *t;
BN_CTX_start(ctx);
t = BN_CTX_get(ctx);
if (t == NULL ||
!BN_copy(t, a)) {
goto err;
}
ret = BN_from_montgomery_word(r, t, mont);
err:
BN_CTX_end(ctx);
return ret;
}
int bn_one_to_montgomery(BIGNUM *r, const BN_MONT_CTX *mont, BN_CTX *ctx) {
// If the high bit of |n| is set, R = 2^(width*BN_BITS2) < 2 * |n|, so we
// compute R - |n| rather than perform Montgomery reduction.
const BIGNUM *n = &mont->N;
if (n->width > 0 && (n->d[n->width - 1] >> (BN_BITS2 - 1)) != 0) {
if (!bn_wexpand(r, n->width)) {
return 0;
}
r->d[0] = 0 - n->d[0];
for (int i = 1; i < n->width; i++) {
r->d[i] = ~n->d[i];
}
r->width = n->width;
r->neg = 0;
return 1;
}
return BN_from_montgomery(r, &mont->RR, mont, ctx);
}
static int bn_mod_mul_montgomery_fallback(BIGNUM *r, const BIGNUM *a,
const BIGNUM *b,
const BN_MONT_CTX *mont,
BN_CTX *ctx) {
int ret = 0;
BN_CTX_start(ctx);
BIGNUM *tmp = BN_CTX_get(ctx);
if (tmp == NULL) {
goto err;
}
if (a == b) {
if (!bn_sqr_consttime(tmp, a, ctx)) {
goto err;
}
} else {
if (!bn_mul_consttime(tmp, a, b, ctx)) {
goto err;
}
}
// reduce from aRR to aR
if (!BN_from_montgomery_word(r, tmp, mont)) {
goto err;
}
ret = 1;
err:
BN_CTX_end(ctx);
return ret;
}
int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
const BN_MONT_CTX *mont, BN_CTX *ctx) {
if (a->neg || b->neg) {
OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
return 0;
}
#if defined(OPENSSL_BN_ASM_MONT)
// |bn_mul_mont| requires at least 128 bits of limbs, at least for x86.
int num = mont->N.width;
if (num >= (128 / BN_BITS2) &&
a->width == num &&
b->width == num) {
if (!bn_wexpand(r, num)) {
return 0;
}
if (!bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) {
// The check above ensures this won't happen.
assert(0);
OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
return 0;
}
r->neg = 0;
r->width = num;
return 1;
}
#endif
return bn_mod_mul_montgomery_fallback(r, a, b, mont, ctx);
}
int bn_less_than_montgomery_R(const BIGNUM *bn, const BN_MONT_CTX *mont) {
return !BN_is_negative(bn) &&
bn_fits_in_words(bn, mont->N.width);
}
void bn_to_montgomery_small(BN_ULONG *r, const BN_ULONG *a, size_t num,
const BN_MONT_CTX *mont) {
bn_mod_mul_montgomery_small(r, a, mont->RR.d, num, mont);
}
void bn_from_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
size_t num_a, const BN_MONT_CTX *mont) {
if (num_r != (size_t)mont->N.width || num_r > BN_SMALL_MAX_WORDS ||
num_a > 2 * num_r) {
abort();
}
BN_ULONG tmp[BN_SMALL_MAX_WORDS * 2] = {0};
OPENSSL_memcpy(tmp, a, num_a * sizeof(BN_ULONG));
if (!bn_from_montgomery_in_place(r, num_r, tmp, 2 * num_r, mont)) {
abort();
}
OPENSSL_cleanse(tmp, 2 * num_r * sizeof(BN_ULONG));
}
void bn_mod_mul_montgomery_small(BN_ULONG *r, const BN_ULONG *a,
const BN_ULONG *b, size_t num,
const BN_MONT_CTX *mont) {
if (num != (size_t)mont->N.width || num > BN_SMALL_MAX_WORDS) {
abort();
}
#if defined(OPENSSL_BN_ASM_MONT)
// |bn_mul_mont| requires at least 128 bits of limbs, at least for x86.
if (num >= (128 / BN_BITS2)) {
if (!bn_mul_mont(r, a, b, mont->N.d, mont->n0, num)) {
abort(); // The check above ensures this won't happen.
}
return;
}
#endif
// Compute the product.
BN_ULONG tmp[2 * BN_SMALL_MAX_WORDS];
if (a == b) {
bn_sqr_small(tmp, 2 * num, a, num);
} else {
bn_mul_small(tmp, 2 * num, a, num, b, num);
}
// Reduce.
if (!bn_from_montgomery_in_place(r, num, tmp, 2 * num, mont)) {
abort();
}
OPENSSL_cleanse(tmp, 2 * num * sizeof(BN_ULONG));
}

View File

@ -0,0 +1,186 @@
/* Copyright 2016 Brian Smith.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#include <CBigNumBoringSSL_bn.h>
#include <assert.h>
#include "internal.h"
#include "../../internal.h"
static uint64_t bn_neg_inv_mod_r_u64(uint64_t n);
OPENSSL_STATIC_ASSERT(BN_MONT_CTX_N0_LIMBS == 1 || BN_MONT_CTX_N0_LIMBS == 2,
"BN_MONT_CTX_N0_LIMBS value is invalid");
OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) * BN_MONT_CTX_N0_LIMBS ==
sizeof(uint64_t),
"uint64_t is insufficient precision for n0");
// LG_LITTLE_R is log_2(r).
#define LG_LITTLE_R (BN_MONT_CTX_N0_LIMBS * BN_BITS2)
uint64_t bn_mont_n0(const BIGNUM *n) {
// These conditions are checked by the caller, |BN_MONT_CTX_set| or
// |BN_MONT_CTX_new_consttime|.
assert(!BN_is_zero(n));
assert(!BN_is_negative(n));
assert(BN_is_odd(n));
// r == 2**(BN_MONT_CTX_N0_LIMBS * BN_BITS2) and LG_LITTLE_R == lg(r). This
// ensures that we can do integer division by |r| by simply ignoring
// |BN_MONT_CTX_N0_LIMBS| limbs. Similarly, we can calculate values modulo
// |r| by just looking at the lowest |BN_MONT_CTX_N0_LIMBS| limbs. This is
// what makes Montgomery multiplication efficient.
//
// As shown in Algorithm 1 of "Fast Prime Field Elliptic Curve Cryptography
// with 256 Bit Primes" by Shay Gueron and Vlad Krasnov, in the loop of a
// multi-limb Montgomery multiplication of |a * b (mod n)|, given the
// unreduced product |t == a * b|, we repeatedly calculate:
//
// t1 := t % r |t1| is |t|'s lowest limb (see previous paragraph).
// t2 := t1*n0*n
// t3 := t + t2
// t := t3 / r copy all limbs of |t3| except the lowest to |t|.
//
// In the last step, it would only make sense to ignore the lowest limb of
// |t3| if it were zero. The middle steps ensure that this is the case:
//
// t3 == 0 (mod r)
// t + t2 == 0 (mod r)
// t + t1*n0*n == 0 (mod r)
// t1*n0*n == -t (mod r)
// t*n0*n == -t (mod r)
// n0*n == -1 (mod r)
// n0 == -1/n (mod r)
//
// Thus, in each iteration of the loop, we multiply by the constant factor
// |n0|, the negative inverse of n (mod r).
// n_mod_r = n % r. As explained above, this is done by taking the lowest
// |BN_MONT_CTX_N0_LIMBS| limbs of |n|.
uint64_t n_mod_r = n->d[0];
#if BN_MONT_CTX_N0_LIMBS == 2
if (n->width > 1) {
n_mod_r |= (uint64_t)n->d[1] << BN_BITS2;
}
#endif
return bn_neg_inv_mod_r_u64(n_mod_r);
}
// bn_neg_inv_r_mod_n_u64 calculates the -1/n mod r; i.e. it calculates |v|
// such that u*r - v*n == 1. |r| is the constant defined in |bn_mont_n0|. |n|
// must be odd.
//
// This is derived from |xbinGCD| in Henry S. Warren, Jr.'s "Montgomery
// Multiplication" (http://www.hackersdelight.org/MontgomeryMultiplication.pdf).
// It is very similar to the MODULAR-INVERSE function in Stephen R. Dussé's and
// Burton S. Kaliski Jr.'s "A Cryptographic Library for the Motorola DSP56000"
// (http://link.springer.com/chapter/10.1007%2F3-540-46877-3_21).
//
// This is inspired by Joppe W. Bos's "Constant Time Modular Inversion"
// (http://www.joppebos.com/files/CTInversion.pdf) so that the inversion is
// constant-time with respect to |n|. We assume uint64_t additions,
// subtractions, shifts, and bitwise operations are all constant time, which
// may be a large leap of faith on 32-bit targets. We avoid division and
// multiplication, which tend to be the most problematic in terms of timing
// leaks.
//
// Most GCD implementations return values such that |u*r + v*n == 1|, so the
// caller would have to negate the resultant |v| for the purpose of Montgomery
// multiplication. This implementation does the negation implicitly by doing
// the computations as a difference instead of a sum.
static uint64_t bn_neg_inv_mod_r_u64(uint64_t n) {
assert(n % 2 == 1);
// alpha == 2**(lg r - 1) == r / 2.
static const uint64_t alpha = UINT64_C(1) << (LG_LITTLE_R - 1);
const uint64_t beta = n;
uint64_t u = 1;
uint64_t v = 0;
// The invariant maintained from here on is:
// 2**(lg r - i) == u*2*alpha - v*beta.
for (size_t i = 0; i < LG_LITTLE_R; ++i) {
#if BN_BITS2 == 64 && defined(BN_ULLONG)
assert((BN_ULLONG)(1) << (LG_LITTLE_R - i) ==
((BN_ULLONG)u * 2 * alpha) - ((BN_ULLONG)v * beta));
#endif
// Delete a common factor of 2 in u and v if |u| is even. Otherwise, set
// |u = (u + beta) / 2| and |v = (v / 2) + alpha|.
uint64_t u_is_odd = UINT64_C(0) - (u & 1); // Either 0xff..ff or 0.
// The addition can overflow, so use Dietz's method for it.
//
// Dietz calculates (x+y)/2 by (x⊕y)>>1 + x&y. This is valid for all
// (unsigned) x and y, even when x+y overflows. Evidence for 32-bit values
// (embedded in 64 bits to so that overflow can be ignored):
//
// (declare-fun x () (_ BitVec 64))
// (declare-fun y () (_ BitVec 64))
// (assert (let (
// (one (_ bv1 64))
// (thirtyTwo (_ bv32 64)))
// (and
// (bvult x (bvshl one thirtyTwo))
// (bvult y (bvshl one thirtyTwo))
// (not (=
// (bvadd (bvlshr (bvxor x y) one) (bvand x y))
// (bvlshr (bvadd x y) one)))
// )))
// (check-sat)
uint64_t beta_if_u_is_odd = beta & u_is_odd; // Either |beta| or 0.
u = ((u ^ beta_if_u_is_odd) >> 1) + (u & beta_if_u_is_odd);
uint64_t alpha_if_u_is_odd = alpha & u_is_odd; // Either |alpha| or 0.
v = (v >> 1) + alpha_if_u_is_odd;
}
// The invariant now shows that u*r - v*n == 1 since r == 2 * alpha.
#if BN_BITS2 == 64 && defined(BN_ULLONG)
assert(1 == ((BN_ULLONG)u * 2 * alpha) - ((BN_ULLONG)v * beta));
#endif
return v;
}
int bn_mod_exp_base_2_consttime(BIGNUM *r, unsigned p, const BIGNUM *n,
BN_CTX *ctx) {
assert(!BN_is_zero(n));
assert(!BN_is_negative(n));
assert(BN_is_odd(n));
BN_zero(r);
unsigned n_bits = BN_num_bits(n);
assert(n_bits != 0);
assert(p > n_bits);
if (n_bits == 1) {
return 1;
}
// Set |r| to the larger power of two smaller than |n|, then shift with
// reductions the rest of the way.
if (!BN_set_bit(r, n_bits - 1) ||
!bn_mod_lshift_consttime(r, r, p - (n_bits - 1), n, ctx)) {
return 0;
}
return 1;
}

View File

@ -0,0 +1,749 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.] */
#include <CBigNumBoringSSL_bn.h>
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <CBigNumBoringSSL_err.h>
#include <CBigNumBoringSSL_mem.h>
#include <CBigNumBoringSSL_type_check.h>
#include "internal.h"
#include "../../internal.h"
#define BN_MUL_RECURSIVE_SIZE_NORMAL 16
#define BN_SQR_RECURSIVE_SIZE_NORMAL BN_MUL_RECURSIVE_SIZE_NORMAL
static void bn_abs_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
size_t num, BN_ULONG *tmp) {
BN_ULONG borrow = bn_sub_words(tmp, a, b, num);
bn_sub_words(r, b, a, num);
bn_select_words(r, 0 - borrow, r /* tmp < 0 */, tmp /* tmp >= 0 */, num);
}
static void bn_mul_normal(BN_ULONG *r, const BN_ULONG *a, size_t na,
const BN_ULONG *b, size_t nb) {
if (na < nb) {
size_t itmp = na;
na = nb;
nb = itmp;
const BN_ULONG *ltmp = a;
a = b;
b = ltmp;
}
BN_ULONG *rr = &(r[na]);
if (nb == 0) {
OPENSSL_memset(r, 0, na * sizeof(BN_ULONG));
return;
}
rr[0] = bn_mul_words(r, a, na, b[0]);
for (;;) {
if (--nb == 0) {
return;
}
rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
if (--nb == 0) {
return;
}
rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
if (--nb == 0) {
return;
}
rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
if (--nb == 0) {
return;
}
rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
rr += 4;
r += 4;
b += 4;
}
}
// bn_sub_part_words sets |r| to |a| - |b|. It returns the borrow bit, which is
// one if the operation underflowed and zero otherwise. |cl| is the common
// length, that is, the shorter of len(a) or len(b). |dl| is the delta length,
// that is, len(a) - len(b). |r|'s length matches the larger of |a| and |b|, or
// cl + abs(dl).
//
// TODO(davidben): Make this take |size_t|. The |cl| + |dl| calling convention
// is confusing.
static BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a,
const BN_ULONG *b, int cl, int dl) {
assert(cl >= 0);
BN_ULONG borrow = bn_sub_words(r, a, b, cl);
if (dl == 0) {
return borrow;
}
r += cl;
a += cl;
b += cl;
if (dl < 0) {
// |a| is shorter than |b|. Complete the subtraction as if the excess words
// in |a| were zeros.
dl = -dl;
for (int i = 0; i < dl; i++) {
r[i] = 0u - b[i] - borrow;
borrow |= r[i] != 0;
}
} else {
// |b| is shorter than |a|. Complete the subtraction as if the excess words
// in |b| were zeros.
for (int i = 0; i < dl; i++) {
// |r| and |a| may alias, so use a temporary.
BN_ULONG tmp = a[i];
r[i] = a[i] - borrow;
borrow = tmp < r[i];
}
}
return borrow;
}
// bn_abs_sub_part_words computes |r| = |a| - |b|, storing the absolute value
// and returning a mask of all ones if the result was negative and all zeros if
// the result was positive. |cl| and |dl| follow the |bn_sub_part_words| calling
// convention.
//
// TODO(davidben): Make this take |size_t|. The |cl| + |dl| calling convention
// is confusing.
static BN_ULONG bn_abs_sub_part_words(BN_ULONG *r, const BN_ULONG *a,
const BN_ULONG *b, int cl, int dl,
BN_ULONG *tmp) {
BN_ULONG borrow = bn_sub_part_words(tmp, a, b, cl, dl);
bn_sub_part_words(r, b, a, cl, -dl);
int r_len = cl + (dl < 0 ? -dl : dl);
borrow = 0 - borrow;
bn_select_words(r, borrow, r /* tmp < 0 */, tmp /* tmp >= 0 */, r_len);
return borrow;
}
int bn_abs_sub_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
BN_CTX *ctx) {
int cl = a->width < b->width ? a->width : b->width;
int dl = a->width - b->width;
int r_len = a->width < b->width ? b->width : a->width;
BN_CTX_start(ctx);
BIGNUM *tmp = BN_CTX_get(ctx);
int ok = tmp != NULL &&
bn_wexpand(r, r_len) &&
bn_wexpand(tmp, r_len);
if (ok) {
bn_abs_sub_part_words(r->d, a->d, b->d, cl, dl, tmp->d);
r->width = r_len;
}
BN_CTX_end(ctx);
return ok;
}
// Karatsuba recursive multiplication algorithm
// (cf. Knuth, The Art of Computer Programming, Vol. 2)
// bn_mul_recursive sets |r| to |a| * |b|, using |t| as scratch space. |r| has
// length 2*|n2|, |a| has length |n2| + |dna|, |b| has length |n2| + |dnb|, and
// |t| has length 4*|n2|. |n2| must be a power of two. Finally, we must have
// -|BN_MUL_RECURSIVE_SIZE_NORMAL|/2 <= |dna| <= 0 and
// -|BN_MUL_RECURSIVE_SIZE_NORMAL|/2 <= |dnb| <= 0.
//
// TODO(davidben): Simplify and |size_t| the calling convention around lengths
// here.
static void bn_mul_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
int n2, int dna, int dnb, BN_ULONG *t) {
// |n2| is a power of two.
assert(n2 != 0 && (n2 & (n2 - 1)) == 0);
// Check |dna| and |dnb| are in range.
assert(-BN_MUL_RECURSIVE_SIZE_NORMAL/2 <= dna && dna <= 0);
assert(-BN_MUL_RECURSIVE_SIZE_NORMAL/2 <= dnb && dnb <= 0);
// Only call bn_mul_comba 8 if n2 == 8 and the
// two arrays are complete [steve]
if (n2 == 8 && dna == 0 && dnb == 0) {
bn_mul_comba8(r, a, b);
return;
}
// Else do normal multiply
if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
if (dna + dnb < 0) {
OPENSSL_memset(&r[2 * n2 + dna + dnb], 0,
sizeof(BN_ULONG) * -(dna + dnb));
}
return;
}
// Split |a| and |b| into a0,a1 and b0,b1, where a0 and b0 have size |n|.
// Split |t| into t0,t1,t2,t3, each of size |n|, with the remaining 4*|n| used
// for recursive calls.
// Split |r| into r0,r1,r2,r3. We must contribute a0*b0 to r0,r1, a0*a1+b0*b1
// to r1,r2, and a1*b1 to r2,r3. The middle term we will compute as:
//
// a0*a1 + b0*b1 = (a0 - a1)*(b1 - b0) + a1*b1 + a0*b0
//
// Note that we know |n| >= |BN_MUL_RECURSIVE_SIZE_NORMAL|/2 above, so
// |tna| and |tnb| are non-negative.
int n = n2 / 2, tna = n + dna, tnb = n + dnb;
// t0 = a0 - a1 and t1 = b1 - b0. The result will be multiplied, so we XOR
// their sign masks, giving the sign of (a0 - a1)*(b1 - b0). t0 and t1
// themselves store the absolute value.
BN_ULONG neg = bn_abs_sub_part_words(t, a, &a[n], tna, n - tna, &t[n2]);
neg ^= bn_abs_sub_part_words(&t[n], &b[n], b, tnb, tnb - n, &t[n2]);
// Compute:
// t2,t3 = t0 * t1 = |(a0 - a1)*(b1 - b0)|
// r0,r1 = a0 * b0
// r2,r3 = a1 * b1
if (n == 4 && dna == 0 && dnb == 0) {
bn_mul_comba4(&t[n2], t, &t[n]);
bn_mul_comba4(r, a, b);
bn_mul_comba4(&r[n2], &a[n], &b[n]);
} else if (n == 8 && dna == 0 && dnb == 0) {
bn_mul_comba8(&t[n2], t, &t[n]);
bn_mul_comba8(r, a, b);
bn_mul_comba8(&r[n2], &a[n], &b[n]);
} else {
BN_ULONG *p = &t[n2 * 2];
bn_mul_recursive(&t[n2], t, &t[n], n, 0, 0, p);
bn_mul_recursive(r, a, b, n, 0, 0, p);
bn_mul_recursive(&r[n2], &a[n], &b[n], n, dna, dnb, p);
}
// t0,t1,c = r0,r1 + r2,r3 = a0*b0 + a1*b1
BN_ULONG c = bn_add_words(t, r, &r[n2], n2);
// t2,t3,c = t0,t1,c + neg*t2,t3 = (a0 - a1)*(b1 - b0) + a1*b1 + a0*b0.
// The second term is stored as the absolute value, so we do this with a
// constant-time select.
BN_ULONG c_neg = c - bn_sub_words(&t[n2 * 2], t, &t[n2], n2);
BN_ULONG c_pos = c + bn_add_words(&t[n2], t, &t[n2], n2);
bn_select_words(&t[n2], neg, &t[n2 * 2], &t[n2], n2);
OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t),
"crypto_word_t is too small");
c = constant_time_select_w(neg, c_neg, c_pos);
// We now have our three components. Add them together.
// r1,r2,c = r1,r2 + t2,t3,c
c += bn_add_words(&r[n], &r[n], &t[n2], n2);
// Propagate the carry bit to the end.
for (int i = n + n2; i < n2 + n2; i++) {
BN_ULONG old = r[i];
r[i] = old + c;
c = r[i] < old;
}
// The product should fit without carries.
assert(c == 0);
}
// bn_mul_part_recursive sets |r| to |a| * |b|, using |t| as scratch space. |r|
// has length 4*|n|, |a| has length |n| + |tna|, |b| has length |n| + |tnb|, and
// |t| has length 8*|n|. |n| must be a power of two. Additionally, we must have
// 0 <= tna < n and 0 <= tnb < n, and |tna| and |tnb| must differ by at most
// one.
//
// TODO(davidben): Make this take |size_t| and perhaps the actual lengths of |a|
// and |b|.
static void bn_mul_part_recursive(BN_ULONG *r, const BN_ULONG *a,
const BN_ULONG *b, int n, int tna, int tnb,
BN_ULONG *t) {
// |n| is a power of two.
assert(n != 0 && (n & (n - 1)) == 0);
// Check |tna| and |tnb| are in range.
assert(0 <= tna && tna < n);
assert(0 <= tnb && tnb < n);
assert(-1 <= tna - tnb && tna - tnb <= 1);
int n2 = n * 2;
if (n < 8) {
bn_mul_normal(r, a, n + tna, b, n + tnb);
OPENSSL_memset(r + n2 + tna + tnb, 0, n2 - tna - tnb);
return;
}
// Split |a| and |b| into a0,a1 and b0,b1, where a0 and b0 have size |n|. |a1|
// and |b1| have size |tna| and |tnb|, respectively.
// Split |t| into t0,t1,t2,t3, each of size |n|, with the remaining 4*|n| used
// for recursive calls.
// Split |r| into r0,r1,r2,r3. We must contribute a0*b0 to r0,r1, a0*a1+b0*b1
// to r1,r2, and a1*b1 to r2,r3. The middle term we will compute as:
//
// a0*a1 + b0*b1 = (a0 - a1)*(b1 - b0) + a1*b1 + a0*b0
// t0 = a0 - a1 and t1 = b1 - b0. The result will be multiplied, so we XOR
// their sign masks, giving the sign of (a0 - a1)*(b1 - b0). t0 and t1
// themselves store the absolute value.
BN_ULONG neg = bn_abs_sub_part_words(t, a, &a[n], tna, n - tna, &t[n2]);
neg ^= bn_abs_sub_part_words(&t[n], &b[n], b, tnb, tnb - n, &t[n2]);
// Compute:
// t2,t3 = t0 * t1 = |(a0 - a1)*(b1 - b0)|
// r0,r1 = a0 * b0
// r2,r3 = a1 * b1
if (n == 8) {
bn_mul_comba8(&t[n2], t, &t[n]);
bn_mul_comba8(r, a, b);
bn_mul_normal(&r[n2], &a[n], tna, &b[n], tnb);
// |bn_mul_normal| only writes |tna| + |tna| words. Zero the rest.
OPENSSL_memset(&r[n2 + tna + tnb], 0, sizeof(BN_ULONG) * (n2 - tna - tnb));
} else {
BN_ULONG *p = &t[n2 * 2];
bn_mul_recursive(&t[n2], t, &t[n], n, 0, 0, p);
bn_mul_recursive(r, a, b, n, 0, 0, p);
OPENSSL_memset(&r[n2], 0, sizeof(BN_ULONG) * n2);
if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL &&
tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
bn_mul_normal(&r[n2], &a[n], tna, &b[n], tnb);
} else {
int i = n;
for (;;) {
i /= 2;
if (i < tna || i < tnb) {
// E.g., n == 16, i == 8 and tna == 11. |tna| and |tnb| are within one
// of each other, so if |tna| is larger and tna > i, then we know
// tnb >= i, and this call is valid.
bn_mul_part_recursive(&r[n2], &a[n], &b[n], i, tna - i, tnb - i, p);
break;
}
if (i == tna || i == tnb) {
// If there is only a bottom half to the number, just do it. We know
// the larger of |tna - i| and |tnb - i| is zero. The other is zero or
// -1 by because of |tna| and |tnb| differ by at most one.
bn_mul_recursive(&r[n2], &a[n], &b[n], i, tna - i, tnb - i, p);
break;
}
// This loop will eventually terminate when |i| falls below
// |BN_MUL_RECURSIVE_SIZE_NORMAL| because we know one of |tna| and |tnb|
// exceeds that.
}
}
}
// t0,t1,c = r0,r1 + r2,r3 = a0*b0 + a1*b1
BN_ULONG c = bn_add_words(t, r, &r[n2], n2);
// t2,t3,c = t0,t1,c + neg*t2,t3 = (a0 - a1)*(b1 - b0) + a1*b1 + a0*b0.
// The second term is stored as the absolute value, so we do this with a
// constant-time select.
BN_ULONG c_neg = c - bn_sub_words(&t[n2 * 2], t, &t[n2], n2);
BN_ULONG c_pos = c + bn_add_words(&t[n2], t, &t[n2], n2);
bn_select_words(&t[n2], neg, &t[n2 * 2], &t[n2], n2);
OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t),
"crypto_word_t is too small");
c = constant_time_select_w(neg, c_neg, c_pos);
// We now have our three components. Add them together.
// r1,r2,c = r1,r2 + t2,t3,c
c += bn_add_words(&r[n], &r[n], &t[n2], n2);
// Propagate the carry bit to the end.
for (int i = n + n2; i < n2 + n2; i++) {
BN_ULONG old = r[i];
r[i] = old + c;
c = r[i] < old;
}
// The product should fit without carries.
assert(c == 0);
}
// bn_mul_impl implements |BN_mul| and |bn_mul_consttime|. Note this function
// breaks |BIGNUM| invariants and may return a negative zero. This is handled by
// the callers.
static int bn_mul_impl(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
BN_CTX *ctx) {
int al = a->width;
int bl = b->width;
if (al == 0 || bl == 0) {
BN_zero(r);
return 1;
}
int ret = 0;
BIGNUM *rr;
BN_CTX_start(ctx);
if (r == a || r == b) {
rr = BN_CTX_get(ctx);
if (rr == NULL) {
goto err;
}
} else {
rr = r;
}
rr->neg = a->neg ^ b->neg;
int i = al - bl;
if (i == 0) {
if (al == 8) {
if (!bn_wexpand(rr, 16)) {
goto err;
}
rr->width = 16;
bn_mul_comba8(rr->d, a->d, b->d);
goto end;
}
}
int top = al + bl;
static const int kMulNormalSize = 16;
if (al >= kMulNormalSize && bl >= kMulNormalSize) {
if (-1 <= i && i <= 1) {
// Find the largest power of two less than or equal to the larger length.
int j;
if (i >= 0) {
j = BN_num_bits_word((BN_ULONG)al);
} else {
j = BN_num_bits_word((BN_ULONG)bl);
}
j = 1 << (j - 1);
assert(j <= al || j <= bl);
BIGNUM *t = BN_CTX_get(ctx);
if (t == NULL) {
goto err;
}
if (al > j || bl > j) {
// We know |al| and |bl| are at most one from each other, so if al > j,
// bl >= j, and vice versa. Thus we can use |bn_mul_part_recursive|.
//
// TODO(davidben): This codepath is almost unused in standard
// algorithms. Is this optimization necessary? See notes in
// https://boringssl-review.googlesource.com/q/I0bd604e2cd6a75c266f64476c23a730ca1721ea6
assert(al >= j && bl >= j);
if (!bn_wexpand(t, j * 8) ||
!bn_wexpand(rr, j * 4)) {
goto err;
}
bn_mul_part_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d);
} else {
// al <= j && bl <= j. Additionally, we know j <= al or j <= bl, so one
// of al - j or bl - j is zero. The other, by the bound on |i| above, is
// zero or -1. Thus, we can use |bn_mul_recursive|.
if (!bn_wexpand(t, j * 4) ||
!bn_wexpand(rr, j * 2)) {
goto err;
}
bn_mul_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d);
}
rr->width = top;
goto end;
}
}
if (!bn_wexpand(rr, top)) {
goto err;
}
rr->width = top;
bn_mul_normal(rr->d, a->d, al, b->d, bl);
end:
if (r != rr && !BN_copy(r, rr)) {
goto err;
}
ret = 1;
err:
BN_CTX_end(ctx);
return ret;
}
int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
if (!bn_mul_impl(r, a, b, ctx)) {
return 0;
}
// This additionally fixes any negative zeros created by |bn_mul_impl|.
bn_set_minimal_width(r);
return 1;
}
int bn_mul_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
// Prevent negative zeros.
if (a->neg || b->neg) {
OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
return 0;
}
return bn_mul_impl(r, a, b, ctx);
}
void bn_mul_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, size_t num_a,
const BN_ULONG *b, size_t num_b) {
if (num_r != num_a + num_b) {
abort();
}
// TODO(davidben): Should this call |bn_mul_comba4| too? |BN_mul| does not
// hit that code.
if (num_a == 8 && num_b == 8) {
bn_mul_comba8(r, a, b);
} else {
bn_mul_normal(r, a, num_a, b, num_b);
}
}
// tmp must have 2*n words
static void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, size_t n,
BN_ULONG *tmp) {
if (n == 0) {
return;
}
size_t max = n * 2;
const BN_ULONG *ap = a;
BN_ULONG *rp = r;
rp[0] = rp[max - 1] = 0;
rp++;
// Compute the contribution of a[i] * a[j] for all i < j.
if (n > 1) {
ap++;
rp[n - 1] = bn_mul_words(rp, ap, n - 1, ap[-1]);
rp += 2;
}
if (n > 2) {
for (size_t i = n - 2; i > 0; i--) {
ap++;
rp[i] = bn_mul_add_words(rp, ap, i, ap[-1]);
rp += 2;
}
}
// The final result fits in |max| words, so none of the following operations
// will overflow.
// Double |r|, giving the contribution of a[i] * a[j] for all i != j.
bn_add_words(r, r, r, max);
// Add in the contribution of a[i] * a[i] for all i.
bn_sqr_words(tmp, a, n);
bn_add_words(r, r, tmp, max);
}
// bn_sqr_recursive sets |r| to |a|^2, using |t| as scratch space. |r| has
// length 2*|n2|, |a| has length |n2|, and |t| has length 4*|n2|. |n2| must be
// a power of two.
static void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, size_t n2,
BN_ULONG *t) {
// |n2| is a power of two.
assert(n2 != 0 && (n2 & (n2 - 1)) == 0);
if (n2 == 4) {
bn_sqr_comba4(r, a);
return;
}
if (n2 == 8) {
bn_sqr_comba8(r, a);
return;
}
if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL) {
bn_sqr_normal(r, a, n2, t);
return;
}
// Split |a| into a0,a1, each of size |n|.
// Split |t| into t0,t1,t2,t3, each of size |n|, with the remaining 4*|n| used
// for recursive calls.
// Split |r| into r0,r1,r2,r3. We must contribute a0^2 to r0,r1, 2*a0*a1 to
// r1,r2, and a1^2 to r2,r3.
size_t n = n2 / 2;
BN_ULONG *t_recursive = &t[n2 * 2];
// t0 = |a0 - a1|.
bn_abs_sub_words(t, a, &a[n], n, &t[n]);
// t2,t3 = t0^2 = |a0 - a1|^2 = a0^2 - 2*a0*a1 + a1^2
bn_sqr_recursive(&t[n2], t, n, t_recursive);
// r0,r1 = a0^2
bn_sqr_recursive(r, a, n, t_recursive);
// r2,r3 = a1^2
bn_sqr_recursive(&r[n2], &a[n], n, t_recursive);
// t0,t1,c = r0,r1 + r2,r3 = a0^2 + a1^2
BN_ULONG c = bn_add_words(t, r, &r[n2], n2);
// t2,t3,c = t0,t1,c - t2,t3 = 2*a0*a1
c -= bn_sub_words(&t[n2], t, &t[n2], n2);
// We now have our three components. Add them together.
// r1,r2,c = r1,r2 + t2,t3,c
c += bn_add_words(&r[n], &r[n], &t[n2], n2);
// Propagate the carry bit to the end.
for (size_t i = n + n2; i < n2 + n2; i++) {
BN_ULONG old = r[i];
r[i] = old + c;
c = r[i] < old;
}
// The square should fit without carries.
assert(c == 0);
}
int BN_mul_word(BIGNUM *bn, BN_ULONG w) {
if (!bn->width) {
return 1;
}
if (w == 0) {
BN_zero(bn);
return 1;
}
BN_ULONG ll = bn_mul_words(bn->d, bn->d, bn->width, w);
if (ll) {
if (!bn_wexpand(bn, bn->width + 1)) {
return 0;
}
bn->d[bn->width++] = ll;
}
return 1;
}
int bn_sqr_consttime(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) {
int al = a->width;
if (al <= 0) {
r->width = 0;
r->neg = 0;
return 1;
}
int ret = 0;
BN_CTX_start(ctx);
BIGNUM *rr = (a != r) ? r : BN_CTX_get(ctx);
BIGNUM *tmp = BN_CTX_get(ctx);
if (!rr || !tmp) {
goto err;
}
int max = 2 * al; // Non-zero (from above)
if (!bn_wexpand(rr, max)) {
goto err;
}
if (al == 4) {
bn_sqr_comba4(rr->d, a->d);
} else if (al == 8) {
bn_sqr_comba8(rr->d, a->d);
} else {
if (al < BN_SQR_RECURSIVE_SIZE_NORMAL) {
BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL * 2];
bn_sqr_normal(rr->d, a->d, al, t);
} else {
// If |al| is a power of two, we can use |bn_sqr_recursive|.
if (al != 0 && (al & (al - 1)) == 0) {
if (!bn_wexpand(tmp, al * 4)) {
goto err;
}
bn_sqr_recursive(rr->d, a->d, al, tmp->d);
} else {
if (!bn_wexpand(tmp, max)) {
goto err;
}
bn_sqr_normal(rr->d, a->d, al, tmp->d);
}
}
}
rr->neg = 0;
rr->width = max;
if (rr != r && !BN_copy(r, rr)) {
goto err;
}
ret = 1;
err:
BN_CTX_end(ctx);
return ret;
}
int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) {
if (!bn_sqr_consttime(r, a, ctx)) {
return 0;
}
bn_set_minimal_width(r);
return 1;
}
void bn_sqr_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, size_t num_a) {
if (num_r != 2 * num_a || num_a > BN_SMALL_MAX_WORDS) {
abort();
}
if (num_a == 4) {
bn_sqr_comba4(r, a);
} else if (num_a == 8) {
bn_sqr_comba8(r, a);
} else {
BN_ULONG tmp[2 * BN_SMALL_MAX_WORDS];
bn_sqr_normal(r, a, num_a, tmp);
OPENSSL_cleanse(tmp, 2 * num_a * sizeof(BN_ULONG));
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,341 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.]
*/
/* ====================================================================
* Copyright (c) 1998-2001 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*
* This product includes cryptographic software written by Eric Young
* (eay@cryptsoft.com). This product includes software written by Tim
* Hudson (tjh@cryptsoft.com). */
#include <CBigNumBoringSSL_bn.h>
#include <limits.h>
#include <string.h>
#include <CBigNumBoringSSL_err.h>
#include <CBigNumBoringSSL_rand.h>
#include <CBigNumBoringSSL_type_check.h>
#include "internal.h"
#include "../../internal.h"
#include "../rand/internal.h"
int BN_rand(BIGNUM *rnd, int bits, int top, int bottom) {
if (rnd == NULL) {
return 0;
}
if (top != BN_RAND_TOP_ANY && top != BN_RAND_TOP_ONE &&
top != BN_RAND_TOP_TWO) {
OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
return 0;
}
if (bottom != BN_RAND_BOTTOM_ANY && bottom != BN_RAND_BOTTOM_ODD) {
OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
return 0;
}
if (bits == 0) {
BN_zero(rnd);
return 1;
}
if (bits > INT_MAX - (BN_BITS2 - 1)) {
OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
return 0;
}
int words = (bits + BN_BITS2 - 1) / BN_BITS2;
int bit = (bits - 1) % BN_BITS2;
const BN_ULONG kOne = 1;
const BN_ULONG kThree = 3;
BN_ULONG mask = bit < BN_BITS2 - 1 ? (kOne << (bit + 1)) - 1 : BN_MASK2;
if (!bn_wexpand(rnd, words)) {
return 0;
}
RAND_bytes((uint8_t *)rnd->d, words * sizeof(BN_ULONG));
rnd->d[words - 1] &= mask;
if (top != BN_RAND_TOP_ANY) {
if (top == BN_RAND_TOP_TWO && bits > 1) {
if (bit == 0) {
rnd->d[words - 1] |= 1;
rnd->d[words - 2] |= kOne << (BN_BITS2 - 1);
} else {
rnd->d[words - 1] |= kThree << (bit - 1);
}
} else {
rnd->d[words - 1] |= kOne << bit;
}
}
if (bottom == BN_RAND_BOTTOM_ODD) {
rnd->d[0] |= 1;
}
rnd->neg = 0;
rnd->width = words;
return 1;
}
int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom) {
return BN_rand(rnd, bits, top, bottom);
}
// bn_less_than_word_mask returns a mask of all ones if the number represented
// by |len| words at |a| is less than |b| and zero otherwise. It performs this
// computation in time independent of the value of |a|. |b| is assumed public.
static crypto_word_t bn_less_than_word_mask(const BN_ULONG *a, size_t len,
BN_ULONG b) {
if (b == 0) {
return CONSTTIME_FALSE_W;
}
if (len == 0) {
return CONSTTIME_TRUE_W;
}
// |a| < |b| iff a[1..len-1] are all zero and a[0] < b.
OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t),
"crypto_word_t is too small");
crypto_word_t mask = 0;
for (size_t i = 1; i < len; i++) {
mask |= a[i];
}
// |mask| is now zero iff a[1..len-1] are all zero.
mask = constant_time_is_zero_w(mask);
mask &= constant_time_lt_w(a[0], b);
return mask;
}
int bn_in_range_words(const BN_ULONG *a, BN_ULONG min_inclusive,
const BN_ULONG *max_exclusive, size_t len) {
crypto_word_t mask = ~bn_less_than_word_mask(a, len, min_inclusive);
return mask & bn_less_than_words(a, max_exclusive, len);
}
static int bn_range_to_mask(size_t *out_words, BN_ULONG *out_mask,
size_t min_inclusive, const BN_ULONG *max_exclusive,
size_t len) {
// The magnitude of |max_exclusive| is assumed public.
size_t words = len;
while (words > 0 && max_exclusive[words - 1] == 0) {
words--;
}
if (words == 0 ||
(words == 1 && max_exclusive[0] <= min_inclusive)) {
OPENSSL_PUT_ERROR(BN, BN_R_INVALID_RANGE);
return 0;
}
BN_ULONG mask = max_exclusive[words - 1];
// This sets all bits in |mask| below the most significant bit.
mask |= mask >> 1;
mask |= mask >> 2;
mask |= mask >> 4;
mask |= mask >> 8;
mask |= mask >> 16;
#if defined(OPENSSL_64_BIT)
mask |= mask >> 32;
#endif
*out_words = words;
*out_mask = mask;
return 1;
}
int bn_rand_range_words(BN_ULONG *out, BN_ULONG min_inclusive,
const BN_ULONG *max_exclusive, size_t len,
const uint8_t additional_data[32]) {
// This function implements the equivalent of steps 4 through 7 of FIPS 186-4
// appendices B.4.2 and B.5.2. When called in those contexts, |max_exclusive|
// is n and |min_inclusive| is one.
// Compute the bit length of |max_exclusive| (step 1), in terms of a number of
// |words| worth of entropy to fill and a mask of bits to clear in the top
// word.
size_t words;
BN_ULONG mask;
if (!bn_range_to_mask(&words, &mask, min_inclusive, max_exclusive, len)) {
return 0;
}
// Fill any unused words with zero.
OPENSSL_memset(out + words, 0, (len - words) * sizeof(BN_ULONG));
unsigned count = 100;
do {
if (!--count) {
OPENSSL_PUT_ERROR(BN, BN_R_TOO_MANY_ITERATIONS);
return 0;
}
// Steps 4 and 5. Use |words| and |mask| together to obtain a string of N
// bits, where N is the bit length of |max_exclusive|.
RAND_bytes_with_additional_data((uint8_t *)out, words * sizeof(BN_ULONG),
additional_data);
out[words - 1] &= mask;
// If out >= max_exclusive or out < min_inclusive, retry. This implements
// the equivalent of steps 6 and 7 without leaking the value of |out|.
} while (!bn_in_range_words(out, min_inclusive, max_exclusive, words));
return 1;
}
int BN_rand_range_ex(BIGNUM *r, BN_ULONG min_inclusive,
const BIGNUM *max_exclusive) {
static const uint8_t kDefaultAdditionalData[32] = {0};
if (!bn_wexpand(r, max_exclusive->width) ||
!bn_rand_range_words(r->d, min_inclusive, max_exclusive->d,
max_exclusive->width, kDefaultAdditionalData)) {
return 0;
}
r->neg = 0;
r->width = max_exclusive->width;
return 1;
}
int bn_rand_secret_range(BIGNUM *r, int *out_is_uniform, BN_ULONG min_inclusive,
const BIGNUM *max_exclusive) {
size_t words;
BN_ULONG mask;
if (!bn_range_to_mask(&words, &mask, min_inclusive, max_exclusive->d,
max_exclusive->width) ||
!bn_wexpand(r, words)) {
return 0;
}
assert(words > 0);
assert(mask != 0);
// The range must be large enough for bit tricks to fix invalid values.
if (words == 1 && min_inclusive > mask >> 1) {
OPENSSL_PUT_ERROR(BN, BN_R_INVALID_RANGE);
return 0;
}
// Select a uniform random number with num_bits(max_exclusive) bits.
RAND_bytes((uint8_t *)r->d, words * sizeof(BN_ULONG));
r->d[words - 1] &= mask;
// Check, in constant-time, if the value is in range.
*out_is_uniform =
bn_in_range_words(r->d, min_inclusive, max_exclusive->d, words);
crypto_word_t in_range = *out_is_uniform;
in_range = 0 - in_range;
// If the value is not in range, force it to be in range.
r->d[0] |= constant_time_select_w(in_range, 0, min_inclusive);
r->d[words - 1] &= constant_time_select_w(in_range, BN_MASK2, mask >> 1);
assert(bn_in_range_words(r->d, min_inclusive, max_exclusive->d, words));
r->neg = 0;
r->width = words;
return 1;
}
int BN_rand_range(BIGNUM *r, const BIGNUM *range) {
return BN_rand_range_ex(r, 0, range);
}
int BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range) {
return BN_rand_range(r, range);
}

View File

@ -0,0 +1,226 @@
/*
* Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
* Copyright (c) 2012, Intel Corporation. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
*
* Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
* (1) Intel Corporation, Israel Development Center, Haifa, Israel
* (2) University of Haifa, Israel
*/
#include "rsaz_exp.h"
#if defined(RSAZ_ENABLED)
#include <CBigNumBoringSSL_mem.h>
#include "internal.h"
#include "../../internal.h"
// one is 1 in RSAZ's representation.
alignas(64) static const BN_ULONG one[40] = {
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
// two80 is 2^80 in RSAZ's representation. Note RSAZ uses base 2^29, so this is
// 2^(29*2 + 22) = 2^80, not 2^(64*2 + 22).
alignas(64) static const BN_ULONG two80[40] = {
0, 0, 1 << 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16],
const BN_ULONG base_norm[16],
const BN_ULONG exponent[16],
const BN_ULONG m_norm[16], const BN_ULONG RR[16],
BN_ULONG k0,
BN_ULONG storage[MOD_EXP_CTIME_STORAGE_LEN]) {
OPENSSL_STATIC_ASSERT(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH % 64 == 0,
"MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH is too small");
assert((uintptr_t)storage % 64 == 0);
BN_ULONG *a_inv, *m, *result, *table_s = storage + 40 * 3, *R2 = table_s;
// Note |R2| aliases |table_s|.
if (((((uintptr_t)storage & 4095) + 320) >> 12) != 0) {
result = storage;
a_inv = storage + 40;
m = storage + 40 * 2; // should not cross page
} else {
m = storage; // should not cross page
result = storage + 40;
a_inv = storage + 40 * 2;
}
rsaz_1024_norm2red_avx2(m, m_norm);
rsaz_1024_norm2red_avx2(a_inv, base_norm);
rsaz_1024_norm2red_avx2(R2, RR);
// Convert |R2| from the usual radix, giving R = 2^1024, to RSAZ's radix,
// giving R = 2^(36*29) = 2^1044.
rsaz_1024_mul_avx2(R2, R2, R2, m, k0);
// R2 = 2^2048 * 2^2048 / 2^1044 = 2^3052
rsaz_1024_mul_avx2(R2, R2, two80, m, k0);
// R2 = 2^3052 * 2^80 / 2^1044 = 2^2088 = (2^1044)^2
// table[0] = 1
rsaz_1024_mul_avx2(result, R2, one, m, k0);
// table[1] = a_inv^1
rsaz_1024_mul_avx2(a_inv, a_inv, R2, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 0);
rsaz_1024_scatter5_avx2(table_s, a_inv, 1);
// table[2] = a_inv^2
rsaz_1024_sqr_avx2(result, a_inv, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 2);
#if 0
// This is almost 2x smaller and less than 1% slower.
for (int index = 3; index < 32; index++) {
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, index);
}
#else
// table[4] = a_inv^4
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 4);
// table[8] = a_inv^8
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 8);
// table[16] = a_inv^16
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 16);
// table[17] = a_inv^17
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 17);
// table[3]
rsaz_1024_gather5_avx2(result, table_s, 2);
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 3);
// table[6]
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 6);
// table[12]
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 12);
// table[24]
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 24);
// table[25]
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 25);
// table[5]
rsaz_1024_gather5_avx2(result, table_s, 4);
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 5);
// table[10]
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 10);
// table[20]
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 20);
// table[21]
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 21);
// table[7]
rsaz_1024_gather5_avx2(result, table_s, 6);
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 7);
// table[14]
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 14);
// table[28]
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 28);
// table[29]
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 29);
// table[9]
rsaz_1024_gather5_avx2(result, table_s, 8);
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 9);
// table[18]
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 18);
// table[19]
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 19);
// table[11]
rsaz_1024_gather5_avx2(result, table_s, 10);
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 11);
// table[22]
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 22);
// table[23]
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 23);
// table[13]
rsaz_1024_gather5_avx2(result, table_s, 12);
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 13);
// table[26]
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 26);
// table[27]
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 27);
// table[15]
rsaz_1024_gather5_avx2(result, table_s, 14);
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 15);
// table[30]
rsaz_1024_sqr_avx2(result, result, m, k0, 1);
rsaz_1024_scatter5_avx2(table_s, result, 30);
// table[31]
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
rsaz_1024_scatter5_avx2(table_s, result, 31);
#endif
const uint8_t *p_str = (const uint8_t *)exponent;
// load first window
int wvalue = p_str[127] >> 3;
rsaz_1024_gather5_avx2(result, table_s, wvalue);
int index = 1014;
while (index > -1) { // Loop for the remaining 127 windows.
rsaz_1024_sqr_avx2(result, result, m, k0, 5);
uint16_t wvalue_16;
memcpy(&wvalue_16, &p_str[index / 8], sizeof(wvalue_16));
wvalue = wvalue_16;
wvalue = (wvalue >> (index % 8)) & 31;
index -= 5;
rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); // Borrow |a_inv|.
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
}
// Square four times.
rsaz_1024_sqr_avx2(result, result, m, k0, 4);
wvalue = p_str[0] & 15;
rsaz_1024_gather5_avx2(a_inv, table_s, wvalue); // Borrow |a_inv|.
rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
// Convert from Montgomery.
rsaz_1024_mul_avx2(result, result, one, m, k0);
rsaz_1024_red2norm_avx2(result_norm, result);
OPENSSL_cleanse(storage, MOD_EXP_CTIME_STORAGE_LEN * sizeof(BN_ULONG));
}
#endif // RSAZ_ENABLED

View File

@ -0,0 +1,104 @@
/*
* Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
* Copyright (c) 2012, Intel Corporation. All Rights Reserved.
*
* Licensed under the OpenSSL license (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
*
* Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
* (1) Intel Corporation, Israel Development Center, Haifa, Israel
* (2) University of Haifa, Israel
*/
#ifndef OPENSSL_HEADER_BN_RSAZ_EXP_H
#define OPENSSL_HEADER_BN_RSAZ_EXP_H
#include <CBigNumBoringSSL_bn.h>
#include <CBigNumBoringSSL_cpu.h>
#include "internal.h"
#if defined(__cplusplus)
extern "C" {
#endif
#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
#define RSAZ_ENABLED
// RSAZ_1024_mod_exp_avx2 sets |result| to |base_norm| raised to |exponent|
// modulo |m_norm|. |base_norm| must be fully-reduced and |exponent| must have
// the high bit set (it is 1024 bits wide). |RR| and |k0| must be |RR| and |n0|,
// respectively, extracted from |m_norm|'s |BN_MONT_CTX|. |storage_words| is a
// temporary buffer that must be aligned to |MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH|
// bytes.
void RSAZ_1024_mod_exp_avx2(BN_ULONG result[16], const BN_ULONG base_norm[16],
const BN_ULONG exponent[16],
const BN_ULONG m_norm[16], const BN_ULONG RR[16],
BN_ULONG k0,
BN_ULONG storage_words[MOD_EXP_CTIME_STORAGE_LEN]);
OPENSSL_INLINE int rsaz_avx2_capable(void) {
const uint32_t *cap = OPENSSL_ia32cap_get();
return (cap[2] & (1 << 5)) != 0; // AVX2
}
OPENSSL_INLINE int rsaz_avx2_preferred(void) {
const uint32_t *cap = OPENSSL_ia32cap_get();
static const uint32_t kBMI2AndADX = (1 << 8) | (1 << 19);
if ((cap[2] & kBMI2AndADX) == kBMI2AndADX) {
// If BMI2 and ADX are available, x86_64-mont5.pl is faster.
return 0;
}
return (cap[2] & (1 << 5)) != 0; // AVX2
}
// Assembly functions.
// RSAZ represents 1024-bit integers using unsaturated 29-bit limbs stored in
// 64-bit integers. This requires 36 limbs but padded up to 40.
//
// See crypto/bn/asm/rsaz-avx2.pl for further details.
// rsaz_1024_norm2red_avx2 converts |norm| from |BIGNUM| to RSAZ representation
// and writes the result to |red|.
void rsaz_1024_norm2red_avx2(BN_ULONG red[40], const BN_ULONG norm[16]);
// rsaz_1024_mul_avx2 computes |a| * |b| mod |n| and writes the result to |ret|.
// Inputs and outputs are in Montgomery form, using RSAZ's representation. |k|
// is -|n|^-1 mod 2^64 or |n0| from |BN_MONT_CTX|.
void rsaz_1024_mul_avx2(BN_ULONG ret[40], const BN_ULONG a[40],
const BN_ULONG b[40], const BN_ULONG n[40], BN_ULONG k);
// rsaz_1024_mul_avx2 computes |a|^(2*|count|) mod |n| and writes the result to
// |ret|. Inputs and outputs are in Montgomery form, using RSAZ's
// representation. |k| is -|n|^-1 mod 2^64 or |n0| from |BN_MONT_CTX|.
void rsaz_1024_sqr_avx2(BN_ULONG ret[40], const BN_ULONG a[40],
const BN_ULONG n[40], BN_ULONG k, int count);
// rsaz_1024_scatter5_avx2 stores |val| at index |i| of |tbl|. |i| must be
// positive and at most 31. Note the table only uses 18 |BN_ULONG|s per entry
// instead of 40. It packs two 29-bit limbs into each |BN_ULONG| and only stores
// 36 limbs rather than the padded 40.
void rsaz_1024_scatter5_avx2(BN_ULONG tbl[32 * 18], const BN_ULONG val[40],
int i);
// rsaz_1024_gather5_avx2 loads index |i| of |tbl| and writes it to |val|.
void rsaz_1024_gather5_avx2(BN_ULONG val[40], const BN_ULONG tbl[32 * 18],
int i);
// rsaz_1024_red2norm_avx2 converts |red| from RSAZ to |BIGNUM| representation
// and writes the result to |norm|.
void rsaz_1024_red2norm_avx2(BN_ULONG norm[16], const BN_ULONG red[40]);
#endif // !OPENSSL_NO_ASM && OPENSSL_X86_64
#if defined(__cplusplus)
} // extern "C"
#endif
#endif // OPENSSL_HEADER_BN_RSAZ_EXP_H

View File

@ -0,0 +1,364 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.] */
#include <CBigNumBoringSSL_bn.h>
#include <string.h>
#include <CBigNumBoringSSL_err.h>
#include <CBigNumBoringSSL_type_check.h>
#include "internal.h"
int BN_lshift(BIGNUM *r, const BIGNUM *a, int n) {
int i, nw, lb, rb;
BN_ULONG *t, *f;
BN_ULONG l;
if (n < 0) {
OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
return 0;
}
r->neg = a->neg;
nw = n / BN_BITS2;
if (!bn_wexpand(r, a->width + nw + 1)) {
return 0;
}
lb = n % BN_BITS2;
rb = BN_BITS2 - lb;
f = a->d;
t = r->d;
t[a->width + nw] = 0;
if (lb == 0) {
for (i = a->width - 1; i >= 0; i--) {
t[nw + i] = f[i];
}
} else {
for (i = a->width - 1; i >= 0; i--) {
l = f[i];
t[nw + i + 1] |= l >> rb;
t[nw + i] = l << lb;
}
}
OPENSSL_memset(t, 0, nw * sizeof(t[0]));
r->width = a->width + nw + 1;
bn_set_minimal_width(r);
return 1;
}
int BN_lshift1(BIGNUM *r, const BIGNUM *a) {
BN_ULONG *ap, *rp, t, c;
int i;
if (r != a) {
r->neg = a->neg;
if (!bn_wexpand(r, a->width + 1)) {
return 0;
}
r->width = a->width;
} else {
if (!bn_wexpand(r, a->width + 1)) {
return 0;
}
}
ap = a->d;
rp = r->d;
c = 0;
for (i = 0; i < a->width; i++) {
t = *(ap++);
*(rp++) = (t << 1) | c;
c = t >> (BN_BITS2 - 1);
}
if (c) {
*rp = 1;
r->width++;
}
return 1;
}
void bn_rshift_words(BN_ULONG *r, const BN_ULONG *a, unsigned shift,
size_t num) {
unsigned shift_bits = shift % BN_BITS2;
size_t shift_words = shift / BN_BITS2;
if (shift_words >= num) {
OPENSSL_memset(r, 0, num * sizeof(BN_ULONG));
return;
}
if (shift_bits == 0) {
OPENSSL_memmove(r, a + shift_words, (num - shift_words) * sizeof(BN_ULONG));
} else {
for (size_t i = shift_words; i < num - 1; i++) {
r[i - shift_words] =
(a[i] >> shift_bits) | (a[i + 1] << (BN_BITS2 - shift_bits));
}
r[num - 1 - shift_words] = a[num - 1] >> shift_bits;
}
OPENSSL_memset(r + num - shift_words, 0, shift_words * sizeof(BN_ULONG));
}
int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) {
if (n < 0) {
OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
return 0;
}
if (!bn_wexpand(r, a->width)) {
return 0;
}
bn_rshift_words(r->d, a->d, n, a->width);
r->neg = a->neg;
r->width = a->width;
bn_set_minimal_width(r);
return 1;
}
int bn_rshift_secret_shift(BIGNUM *r, const BIGNUM *a, unsigned n,
BN_CTX *ctx) {
int ret = 0;
BN_CTX_start(ctx);
BIGNUM *tmp = BN_CTX_get(ctx);
if (tmp == NULL ||
!BN_copy(r, a) ||
!bn_wexpand(tmp, r->width)) {
goto err;
}
// Shift conditionally by powers of two.
unsigned max_bits = BN_BITS2 * r->width;
for (unsigned i = 0; (max_bits >> i) != 0; i++) {
BN_ULONG mask = (n >> i) & 1;
mask = 0 - mask;
bn_rshift_words(tmp->d, r->d, 1u << i, r->width);
bn_select_words(r->d, mask, tmp->d /* apply shift */,
r->d /* ignore shift */, r->width);
}
ret = 1;
err:
BN_CTX_end(ctx);
return ret;
}
void bn_rshift1_words(BN_ULONG *r, const BN_ULONG *a, size_t num) {
if (num == 0) {
return;
}
for (size_t i = 0; i < num - 1; i++) {
r[i] = (a[i] >> 1) | (a[i + 1] << (BN_BITS2 - 1));
}
r[num - 1] = a[num - 1] >> 1;
}
int BN_rshift1(BIGNUM *r, const BIGNUM *a) {
if (!bn_wexpand(r, a->width)) {
return 0;
}
bn_rshift1_words(r->d, a->d, a->width);
r->width = a->width;
r->neg = a->neg;
bn_set_minimal_width(r);
return 1;
}
int BN_set_bit(BIGNUM *a, int n) {
if (n < 0) {
return 0;
}
int i = n / BN_BITS2;
int j = n % BN_BITS2;
if (a->width <= i) {
if (!bn_wexpand(a, i + 1)) {
return 0;
}
for (int k = a->width; k < i + 1; k++) {
a->d[k] = 0;
}
a->width = i + 1;
}
a->d[i] |= (((BN_ULONG)1) << j);
return 1;
}
int BN_clear_bit(BIGNUM *a, int n) {
int i, j;
if (n < 0) {
return 0;
}
i = n / BN_BITS2;
j = n % BN_BITS2;
if (a->width <= i) {
return 0;
}
a->d[i] &= (~(((BN_ULONG)1) << j));
bn_set_minimal_width(a);
return 1;
}
int bn_is_bit_set_words(const BN_ULONG *a, size_t num, unsigned bit) {
unsigned i = bit / BN_BITS2;
unsigned j = bit % BN_BITS2;
if (i >= num) {
return 0;
}
return (a[i] >> j) & 1;
}
int BN_is_bit_set(const BIGNUM *a, int n) {
if (n < 0) {
return 0;
}
return bn_is_bit_set_words(a->d, a->width, n);
}
int BN_mask_bits(BIGNUM *a, int n) {
if (n < 0) {
return 0;
}
int w = n / BN_BITS2;
int b = n % BN_BITS2;
if (w >= a->width) {
return 1;
}
if (b == 0) {
a->width = w;
} else {
a->width = w + 1;
a->d[w] &= ~(BN_MASK2 << b);
}
bn_set_minimal_width(a);
return 1;
}
static int bn_count_low_zero_bits_word(BN_ULONG l) {
OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t),
"crypto_word_t is too small");
OPENSSL_STATIC_ASSERT(sizeof(int) <= sizeof(crypto_word_t),
"crypto_word_t is too small");
OPENSSL_STATIC_ASSERT(BN_BITS2 == sizeof(BN_ULONG) * 8,
"BN_ULONG has padding bits");
// C has very bizarre rules for types smaller than an int.
OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) >= sizeof(int),
"BN_ULONG gets promoted to int");
crypto_word_t mask;
int bits = 0;
#if BN_BITS2 > 32
// Check if the lower half of |x| are all zero.
mask = constant_time_is_zero_w(l << (BN_BITS2 - 32));
// If the lower half is all zeros, it is included in the bit count and we
// count the upper half. Otherwise, we count the lower half.
bits += 32 & mask;
l = constant_time_select_w(mask, l >> 32, l);
#endif
// The remaining blocks are analogous iterations at lower powers of two.
mask = constant_time_is_zero_w(l << (BN_BITS2 - 16));
bits += 16 & mask;
l = constant_time_select_w(mask, l >> 16, l);
mask = constant_time_is_zero_w(l << (BN_BITS2 - 8));
bits += 8 & mask;
l = constant_time_select_w(mask, l >> 8, l);
mask = constant_time_is_zero_w(l << (BN_BITS2 - 4));
bits += 4 & mask;
l = constant_time_select_w(mask, l >> 4, l);
mask = constant_time_is_zero_w(l << (BN_BITS2 - 2));
bits += 2 & mask;
l = constant_time_select_w(mask, l >> 2, l);
mask = constant_time_is_zero_w(l << (BN_BITS2 - 1));
bits += 1 & mask;
return bits;
}
int BN_count_low_zero_bits(const BIGNUM *bn) {
OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t),
"crypto_word_t is too small");
OPENSSL_STATIC_ASSERT(sizeof(int) <= sizeof(crypto_word_t),
"crypto_word_t is too small");
int ret = 0;
crypto_word_t saw_nonzero = 0;
for (int i = 0; i < bn->width; i++) {
crypto_word_t nonzero = ~constant_time_is_zero_w(bn->d[i]);
crypto_word_t first_nonzero = ~saw_nonzero & nonzero;
saw_nonzero |= nonzero;
int bits = bn_count_low_zero_bits_word(bn->d[i]);
ret |= first_nonzero & (i * BN_BITS2 + bits);
}
// If got to the end of |bn| and saw no non-zero words, |bn| is zero. |ret|
// will then remain zero.
return ret;
}

View File

@ -0,0 +1,502 @@
/* Written by Lenka Fibikova <fibikova@exp-math.uni-essen.de>
* and Bodo Moeller for the OpenSSL project. */
/* ====================================================================
* Copyright (c) 1998-2000 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ====================================================================
*
* This product includes cryptographic software written by Eric Young
* (eay@cryptsoft.com). This product includes software written by Tim
* Hudson (tjh@cryptsoft.com). */
#include <CBigNumBoringSSL_bn.h>
#include <CBigNumBoringSSL_err.h>
#include "internal.h"
BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
// Compute a square root of |a| mod |p| using the Tonelli/Shanks algorithm
// (cf. Henri Cohen, "A Course in Algebraic Computational Number Theory",
// algorithm 1.5.1). |p| is assumed to be a prime.
BIGNUM *ret = in;
int err = 1;
int r;
BIGNUM *A, *b, *q, *t, *x, *y;
int e, i, j;
if (!BN_is_odd(p) || BN_abs_is_word(p, 1)) {
if (BN_abs_is_word(p, 2)) {
if (ret == NULL) {
ret = BN_new();
}
if (ret == NULL) {
goto end;
}
if (!BN_set_word(ret, BN_is_bit_set(a, 0))) {
if (ret != in) {
BN_free(ret);
}
return NULL;
}
return ret;
}
OPENSSL_PUT_ERROR(BN, BN_R_P_IS_NOT_PRIME);
return (NULL);
}
if (BN_is_zero(a) || BN_is_one(a)) {
if (ret == NULL) {
ret = BN_new();
}
if (ret == NULL) {
goto end;
}
if (!BN_set_word(ret, BN_is_one(a))) {
if (ret != in) {
BN_free(ret);
}
return NULL;
}
return ret;
}
BN_CTX_start(ctx);
A = BN_CTX_get(ctx);
b = BN_CTX_get(ctx);
q = BN_CTX_get(ctx);
t = BN_CTX_get(ctx);
x = BN_CTX_get(ctx);
y = BN_CTX_get(ctx);
if (y == NULL) {
goto end;
}
if (ret == NULL) {
ret = BN_new();
}
if (ret == NULL) {
goto end;
}
// A = a mod p
if (!BN_nnmod(A, a, p, ctx)) {
goto end;
}
// now write |p| - 1 as 2^e*q where q is odd
e = 1;
while (!BN_is_bit_set(p, e)) {
e++;
}
// we'll set q later (if needed)
if (e == 1) {
// The easy case: (|p|-1)/2 is odd, so 2 has an inverse
// modulo (|p|-1)/2, and square roots can be computed
// directly by modular exponentiation.
// We have
// 2 * (|p|+1)/4 == 1 (mod (|p|-1)/2),
// so we can use exponent (|p|+1)/4, i.e. (|p|-3)/4 + 1.
if (!BN_rshift(q, p, 2)) {
goto end;
}
q->neg = 0;
if (!BN_add_word(q, 1) ||
!BN_mod_exp_mont(ret, A, q, p, ctx, NULL)) {
goto end;
}
err = 0;
goto vrfy;
}
if (e == 2) {
// |p| == 5 (mod 8)
//
// In this case 2 is always a non-square since
// Legendre(2,p) = (-1)^((p^2-1)/8) for any odd prime.
// So if a really is a square, then 2*a is a non-square.
// Thus for
// b := (2*a)^((|p|-5)/8),
// i := (2*a)*b^2
// we have
// i^2 = (2*a)^((1 + (|p|-5)/4)*2)
// = (2*a)^((p-1)/2)
// = -1;
// so if we set
// x := a*b*(i-1),
// then
// x^2 = a^2 * b^2 * (i^2 - 2*i + 1)
// = a^2 * b^2 * (-2*i)
// = a*(-i)*(2*a*b^2)
// = a*(-i)*i
// = a.
//
// (This is due to A.O.L. Atkin,
// <URL:
//http://listserv.nodak.edu/scripts/wa.exe?A2=ind9211&L=nmbrthry&O=T&P=562>,
// November 1992.)
// t := 2*a
if (!bn_mod_lshift1_consttime(t, A, p, ctx)) {
goto end;
}
// b := (2*a)^((|p|-5)/8)
if (!BN_rshift(q, p, 3)) {
goto end;
}
q->neg = 0;
if (!BN_mod_exp_mont(b, t, q, p, ctx, NULL)) {
goto end;
}
// y := b^2
if (!BN_mod_sqr(y, b, p, ctx)) {
goto end;
}
// t := (2*a)*b^2 - 1
if (!BN_mod_mul(t, t, y, p, ctx) ||
!BN_sub_word(t, 1)) {
goto end;
}
// x = a*b*t
if (!BN_mod_mul(x, A, b, p, ctx) ||
!BN_mod_mul(x, x, t, p, ctx)) {
goto end;
}
if (!BN_copy(ret, x)) {
goto end;
}
err = 0;
goto vrfy;
}
// e > 2, so we really have to use the Tonelli/Shanks algorithm.
// First, find some y that is not a square.
if (!BN_copy(q, p)) {
goto end; // use 'q' as temp
}
q->neg = 0;
i = 2;
do {
// For efficiency, try small numbers first;
// if this fails, try random numbers.
if (i < 22) {
if (!BN_set_word(y, i)) {
goto end;
}
} else {
if (!BN_pseudo_rand(y, BN_num_bits(p), 0, 0)) {
goto end;
}
if (BN_ucmp(y, p) >= 0) {
if (!(p->neg ? BN_add : BN_sub)(y, y, p)) {
goto end;
}
}
// now 0 <= y < |p|
if (BN_is_zero(y)) {
if (!BN_set_word(y, i)) {
goto end;
}
}
}
r = bn_jacobi(y, q, ctx); // here 'q' is |p|
if (r < -1) {
goto end;
}
if (r == 0) {
// m divides p
OPENSSL_PUT_ERROR(BN, BN_R_P_IS_NOT_PRIME);
goto end;
}
} while (r == 1 && ++i < 82);
if (r != -1) {
// Many rounds and still no non-square -- this is more likely
// a bug than just bad luck.
// Even if p is not prime, we should have found some y
// such that r == -1.
OPENSSL_PUT_ERROR(BN, BN_R_TOO_MANY_ITERATIONS);
goto end;
}
// Here's our actual 'q':
if (!BN_rshift(q, q, e)) {
goto end;
}
// Now that we have some non-square, we can find an element
// of order 2^e by computing its q'th power.
if (!BN_mod_exp_mont(y, y, q, p, ctx, NULL)) {
goto end;
}
if (BN_is_one(y)) {
OPENSSL_PUT_ERROR(BN, BN_R_P_IS_NOT_PRIME);
goto end;
}
// Now we know that (if p is indeed prime) there is an integer
// k, 0 <= k < 2^e, such that
//
// a^q * y^k == 1 (mod p).
//
// As a^q is a square and y is not, k must be even.
// q+1 is even, too, so there is an element
//
// X := a^((q+1)/2) * y^(k/2),
//
// and it satisfies
//
// X^2 = a^q * a * y^k
// = a,
//
// so it is the square root that we are looking for.
// t := (q-1)/2 (note that q is odd)
if (!BN_rshift1(t, q)) {
goto end;
}
// x := a^((q-1)/2)
if (BN_is_zero(t)) // special case: p = 2^e + 1
{
if (!BN_nnmod(t, A, p, ctx)) {
goto end;
}
if (BN_is_zero(t)) {
// special case: a == 0 (mod p)
BN_zero(ret);
err = 0;
goto end;
} else if (!BN_one(x)) {
goto end;
}
} else {
if (!BN_mod_exp_mont(x, A, t, p, ctx, NULL)) {
goto end;
}
if (BN_is_zero(x)) {
// special case: a == 0 (mod p)
BN_zero(ret);
err = 0;
goto end;
}
}
// b := a*x^2 (= a^q)
if (!BN_mod_sqr(b, x, p, ctx) ||
!BN_mod_mul(b, b, A, p, ctx)) {
goto end;
}
// x := a*x (= a^((q+1)/2))
if (!BN_mod_mul(x, x, A, p, ctx)) {
goto end;
}
while (1) {
// Now b is a^q * y^k for some even k (0 <= k < 2^E
// where E refers to the original value of e, which we
// don't keep in a variable), and x is a^((q+1)/2) * y^(k/2).
//
// We have a*b = x^2,
// y^2^(e-1) = -1,
// b^2^(e-1) = 1.
if (BN_is_one(b)) {
if (!BN_copy(ret, x)) {
goto end;
}
err = 0;
goto vrfy;
}
// find smallest i such that b^(2^i) = 1
i = 1;
if (!BN_mod_sqr(t, b, p, ctx)) {
goto end;
}
while (!BN_is_one(t)) {
i++;
if (i == e) {
OPENSSL_PUT_ERROR(BN, BN_R_NOT_A_SQUARE);
goto end;
}
if (!BN_mod_mul(t, t, t, p, ctx)) {
goto end;
}
}
// t := y^2^(e - i - 1)
if (!BN_copy(t, y)) {
goto end;
}
for (j = e - i - 1; j > 0; j--) {
if (!BN_mod_sqr(t, t, p, ctx)) {
goto end;
}
}
if (!BN_mod_mul(y, t, t, p, ctx) ||
!BN_mod_mul(x, x, t, p, ctx) ||
!BN_mod_mul(b, b, y, p, ctx)) {
goto end;
}
e = i;
}
vrfy:
if (!err) {
// verify the result -- the input might have been not a square
// (test added in 0.9.8)
if (!BN_mod_sqr(x, ret, p, ctx)) {
err = 1;
}
if (!err && 0 != BN_cmp(x, A)) {
OPENSSL_PUT_ERROR(BN, BN_R_NOT_A_SQUARE);
err = 1;
}
}
end:
if (err) {
if (ret != in) {
BN_clear_free(ret);
}
ret = NULL;
}
BN_CTX_end(ctx);
return ret;
}
int BN_sqrt(BIGNUM *out_sqrt, const BIGNUM *in, BN_CTX *ctx) {
BIGNUM *estimate, *tmp, *delta, *last_delta, *tmp2;
int ok = 0, last_delta_valid = 0;
if (in->neg) {
OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
return 0;
}
if (BN_is_zero(in)) {
BN_zero(out_sqrt);
return 1;
}
BN_CTX_start(ctx);
if (out_sqrt == in) {
estimate = BN_CTX_get(ctx);
} else {
estimate = out_sqrt;
}
tmp = BN_CTX_get(ctx);
last_delta = BN_CTX_get(ctx);
delta = BN_CTX_get(ctx);
if (estimate == NULL || tmp == NULL || last_delta == NULL || delta == NULL) {
OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
goto err;
}
// We estimate that the square root of an n-bit number is 2^{n/2}.
if (!BN_lshift(estimate, BN_value_one(), BN_num_bits(in)/2)) {
goto err;
}
// This is Newton's method for finding a root of the equation |estimate|^2 -
// |in| = 0.
for (;;) {
// |estimate| = 1/2 * (|estimate| + |in|/|estimate|)
if (!BN_div(tmp, NULL, in, estimate, ctx) ||
!BN_add(tmp, tmp, estimate) ||
!BN_rshift1(estimate, tmp) ||
// |tmp| = |estimate|^2
!BN_sqr(tmp, estimate, ctx) ||
// |delta| = |in| - |tmp|
!BN_sub(delta, in, tmp)) {
OPENSSL_PUT_ERROR(BN, ERR_R_BN_LIB);
goto err;
}
delta->neg = 0;
// The difference between |in| and |estimate| squared is required to always
// decrease. This ensures that the loop always terminates, but I don't have
// a proof that it always finds the square root for a given square.
if (last_delta_valid && BN_cmp(delta, last_delta) >= 0) {
break;
}
last_delta_valid = 1;
tmp2 = last_delta;
last_delta = delta;
delta = tmp2;
}
if (BN_cmp(tmp, in) != 0) {
OPENSSL_PUT_ERROR(BN, BN_R_NOT_A_SQUARE);
goto err;
}
ok = 1;
err:
if (ok && out_sqrt == in && !BN_copy(out_sqrt, estimate)) {
ok = 0;
}
BN_CTX_end(ctx);
return ok;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,620 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.] */
#include <CBigNumBoringSSL_cipher.h>
#include <assert.h>
#include <string.h>
#include <CBigNumBoringSSL_err.h>
#include <CBigNumBoringSSL_mem.h>
#include <CBigNumBoringSSL_nid.h>
#include "internal.h"
#include "../../internal.h"
void EVP_CIPHER_CTX_init(EVP_CIPHER_CTX *ctx) {
OPENSSL_memset(ctx, 0, sizeof(EVP_CIPHER_CTX));
}
EVP_CIPHER_CTX *EVP_CIPHER_CTX_new(void) {
EVP_CIPHER_CTX *ctx = OPENSSL_malloc(sizeof(EVP_CIPHER_CTX));
if (ctx) {
EVP_CIPHER_CTX_init(ctx);
}
return ctx;
}
int EVP_CIPHER_CTX_cleanup(EVP_CIPHER_CTX *c) {
if (c->cipher != NULL && c->cipher->cleanup) {
c->cipher->cleanup(c);
}
OPENSSL_free(c->cipher_data);
OPENSSL_memset(c, 0, sizeof(EVP_CIPHER_CTX));
return 1;
}
void EVP_CIPHER_CTX_free(EVP_CIPHER_CTX *ctx) {
if (ctx) {
EVP_CIPHER_CTX_cleanup(ctx);
OPENSSL_free(ctx);
}
}
int EVP_CIPHER_CTX_copy(EVP_CIPHER_CTX *out, const EVP_CIPHER_CTX *in) {
if (in == NULL || in->cipher == NULL) {
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_INPUT_NOT_INITIALIZED);
return 0;
}
EVP_CIPHER_CTX_cleanup(out);
OPENSSL_memcpy(out, in, sizeof(EVP_CIPHER_CTX));
if (in->cipher_data && in->cipher->ctx_size) {
out->cipher_data = OPENSSL_malloc(in->cipher->ctx_size);
if (!out->cipher_data) {
out->cipher = NULL;
OPENSSL_PUT_ERROR(CIPHER, ERR_R_MALLOC_FAILURE);
return 0;
}
OPENSSL_memcpy(out->cipher_data, in->cipher_data, in->cipher->ctx_size);
}
if (in->cipher->flags & EVP_CIPH_CUSTOM_COPY) {
if (!in->cipher->ctrl((EVP_CIPHER_CTX *)in, EVP_CTRL_COPY, 0, out)) {
out->cipher = NULL;
return 0;
}
}
return 1;
}
int EVP_CIPHER_CTX_reset(EVP_CIPHER_CTX *ctx) {
EVP_CIPHER_CTX_cleanup(ctx);
EVP_CIPHER_CTX_init(ctx);
return 1;
}
int EVP_CipherInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher,
ENGINE *engine, const uint8_t *key, const uint8_t *iv,
int enc) {
if (enc == -1) {
enc = ctx->encrypt;
} else {
if (enc) {
enc = 1;
}
ctx->encrypt = enc;
}
if (cipher) {
// Ensure a context left from last time is cleared (the previous check
// attempted to avoid this if the same ENGINE and EVP_CIPHER could be
// used).
if (ctx->cipher) {
EVP_CIPHER_CTX_cleanup(ctx);
// Restore encrypt and flags
ctx->encrypt = enc;
}
ctx->cipher = cipher;
if (ctx->cipher->ctx_size) {
ctx->cipher_data = OPENSSL_malloc(ctx->cipher->ctx_size);
if (!ctx->cipher_data) {
ctx->cipher = NULL;
OPENSSL_PUT_ERROR(CIPHER, ERR_R_MALLOC_FAILURE);
return 0;
}
} else {
ctx->cipher_data = NULL;
}
ctx->key_len = cipher->key_len;
ctx->flags = 0;
if (ctx->cipher->flags & EVP_CIPH_CTRL_INIT) {
if (!EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_INIT, 0, NULL)) {
ctx->cipher = NULL;
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_INITIALIZATION_ERROR);
return 0;
}
}
} else if (!ctx->cipher) {
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_NO_CIPHER_SET);
return 0;
}
// we assume block size is a power of 2 in *cryptUpdate
assert(ctx->cipher->block_size == 1 || ctx->cipher->block_size == 8 ||
ctx->cipher->block_size == 16);
if (!(EVP_CIPHER_CTX_flags(ctx) & EVP_CIPH_CUSTOM_IV)) {
switch (EVP_CIPHER_CTX_mode(ctx)) {
case EVP_CIPH_STREAM_CIPHER:
case EVP_CIPH_ECB_MODE:
break;
case EVP_CIPH_CFB_MODE:
ctx->num = 0;
OPENSSL_FALLTHROUGH;
case EVP_CIPH_CBC_MODE:
assert(EVP_CIPHER_CTX_iv_length(ctx) <= sizeof(ctx->iv));
if (iv) {
OPENSSL_memcpy(ctx->oiv, iv, EVP_CIPHER_CTX_iv_length(ctx));
}
OPENSSL_memcpy(ctx->iv, ctx->oiv, EVP_CIPHER_CTX_iv_length(ctx));
break;
case EVP_CIPH_CTR_MODE:
case EVP_CIPH_OFB_MODE:
ctx->num = 0;
// Don't reuse IV for CTR mode
if (iv) {
OPENSSL_memcpy(ctx->iv, iv, EVP_CIPHER_CTX_iv_length(ctx));
}
break;
default:
return 0;
}
}
if (key || (ctx->cipher->flags & EVP_CIPH_ALWAYS_CALL_INIT)) {
if (!ctx->cipher->init(ctx, key, iv, enc)) {
return 0;
}
}
ctx->buf_len = 0;
ctx->final_used = 0;
ctx->block_mask = ctx->cipher->block_size - 1;
return 1;
}
int EVP_EncryptInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher,
ENGINE *impl, const uint8_t *key, const uint8_t *iv) {
return EVP_CipherInit_ex(ctx, cipher, impl, key, iv, 1);
}
int EVP_DecryptInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher,
ENGINE *impl, const uint8_t *key, const uint8_t *iv) {
return EVP_CipherInit_ex(ctx, cipher, impl, key, iv, 0);
}
int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len,
const uint8_t *in, int in_len) {
int i, j, bl;
if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) {
i = ctx->cipher->cipher(ctx, out, in, in_len);
if (i < 0) {
return 0;
} else {
*out_len = i;
}
return 1;
}
if (in_len <= 0) {
*out_len = 0;
return in_len == 0;
}
if (ctx->buf_len == 0 && (in_len & ctx->block_mask) == 0) {
if (ctx->cipher->cipher(ctx, out, in, in_len)) {
*out_len = in_len;
return 1;
} else {
*out_len = 0;
return 0;
}
}
i = ctx->buf_len;
bl = ctx->cipher->block_size;
assert(bl <= (int)sizeof(ctx->buf));
if (i != 0) {
if (bl - i > in_len) {
OPENSSL_memcpy(&ctx->buf[i], in, in_len);
ctx->buf_len += in_len;
*out_len = 0;
return 1;
} else {
j = bl - i;
OPENSSL_memcpy(&ctx->buf[i], in, j);
if (!ctx->cipher->cipher(ctx, out, ctx->buf, bl)) {
return 0;
}
in_len -= j;
in += j;
out += bl;
*out_len = bl;
}
} else {
*out_len = 0;
}
i = in_len & ctx->block_mask;
in_len -= i;
if (in_len > 0) {
if (!ctx->cipher->cipher(ctx, out, in, in_len)) {
return 0;
}
*out_len += in_len;
}
if (i != 0) {
OPENSSL_memcpy(ctx->buf, &in[in_len], i);
}
ctx->buf_len = i;
return 1;
}
int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len) {
int n, ret;
unsigned int i, b, bl;
if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) {
ret = ctx->cipher->cipher(ctx, out, NULL, 0);
if (ret < 0) {
return 0;
} else {
*out_len = ret;
}
return 1;
}
b = ctx->cipher->block_size;
assert(b <= sizeof(ctx->buf));
if (b == 1) {
*out_len = 0;
return 1;
}
bl = ctx->buf_len;
if (ctx->flags & EVP_CIPH_NO_PADDING) {
if (bl) {
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_DATA_NOT_MULTIPLE_OF_BLOCK_LENGTH);
return 0;
}
*out_len = 0;
return 1;
}
n = b - bl;
for (i = bl; i < b; i++) {
ctx->buf[i] = n;
}
ret = ctx->cipher->cipher(ctx, out, ctx->buf, b);
if (ret) {
*out_len = b;
}
return ret;
}
int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len,
const uint8_t *in, int in_len) {
int fix_len;
unsigned int b;
if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) {
int r = ctx->cipher->cipher(ctx, out, in, in_len);
if (r < 0) {
*out_len = 0;
return 0;
} else {
*out_len = r;
}
return 1;
}
if (in_len <= 0) {
*out_len = 0;
return in_len == 0;
}
if (ctx->flags & EVP_CIPH_NO_PADDING) {
return EVP_EncryptUpdate(ctx, out, out_len, in, in_len);
}
b = ctx->cipher->block_size;
assert(b <= sizeof(ctx->final));
if (ctx->final_used) {
OPENSSL_memcpy(out, ctx->final, b);
out += b;
fix_len = 1;
} else {
fix_len = 0;
}
if (!EVP_EncryptUpdate(ctx, out, out_len, in, in_len)) {
return 0;
}
// if we have 'decrypted' a multiple of block size, make sure
// we have a copy of this last block
if (b > 1 && !ctx->buf_len) {
*out_len -= b;
ctx->final_used = 1;
OPENSSL_memcpy(ctx->final, &out[*out_len], b);
} else {
ctx->final_used = 0;
}
if (fix_len) {
*out_len += b;
}
return 1;
}
int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *out_len) {
int i, n;
unsigned int b;
*out_len = 0;
if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) {
i = ctx->cipher->cipher(ctx, out, NULL, 0);
if (i < 0) {
return 0;
} else {
*out_len = i;
}
return 1;
}
b = ctx->cipher->block_size;
if (ctx->flags & EVP_CIPH_NO_PADDING) {
if (ctx->buf_len) {
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_DATA_NOT_MULTIPLE_OF_BLOCK_LENGTH);
return 0;
}
*out_len = 0;
return 1;
}
if (b > 1) {
if (ctx->buf_len || !ctx->final_used) {
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_WRONG_FINAL_BLOCK_LENGTH);
return 0;
}
assert(b <= sizeof(ctx->final));
// The following assumes that the ciphertext has been authenticated.
// Otherwise it provides a padding oracle.
n = ctx->final[b - 1];
if (n == 0 || n > (int)b) {
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_DECRYPT);
return 0;
}
for (i = 0; i < n; i++) {
if (ctx->final[--b] != n) {
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_DECRYPT);
return 0;
}
}
n = ctx->cipher->block_size - n;
for (i = 0; i < n; i++) {
out[i] = ctx->final[i];
}
*out_len = n;
} else {
*out_len = 0;
}
return 1;
}
int EVP_Cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, const uint8_t *in,
size_t in_len) {
return ctx->cipher->cipher(ctx, out, in, in_len);
}
int EVP_CipherUpdate(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len,
const uint8_t *in, int in_len) {
if (ctx->encrypt) {
return EVP_EncryptUpdate(ctx, out, out_len, in, in_len);
} else {
return EVP_DecryptUpdate(ctx, out, out_len, in, in_len);
}
}
int EVP_CipherFinal_ex(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len) {
if (ctx->encrypt) {
return EVP_EncryptFinal_ex(ctx, out, out_len);
} else {
return EVP_DecryptFinal_ex(ctx, out, out_len);
}
}
const EVP_CIPHER *EVP_CIPHER_CTX_cipher(const EVP_CIPHER_CTX *ctx) {
return ctx->cipher;
}
int EVP_CIPHER_CTX_nid(const EVP_CIPHER_CTX *ctx) {
return ctx->cipher->nid;
}
int EVP_CIPHER_CTX_encrypting(const EVP_CIPHER_CTX *ctx) {
return ctx->encrypt;
}
unsigned EVP_CIPHER_CTX_block_size(const EVP_CIPHER_CTX *ctx) {
return ctx->cipher->block_size;
}
unsigned EVP_CIPHER_CTX_key_length(const EVP_CIPHER_CTX *ctx) {
return ctx->key_len;
}
unsigned EVP_CIPHER_CTX_iv_length(const EVP_CIPHER_CTX *ctx) {
return ctx->cipher->iv_len;
}
void *EVP_CIPHER_CTX_get_app_data(const EVP_CIPHER_CTX *ctx) {
return ctx->app_data;
}
void EVP_CIPHER_CTX_set_app_data(EVP_CIPHER_CTX *ctx, void *data) {
ctx->app_data = data;
}
uint32_t EVP_CIPHER_CTX_flags(const EVP_CIPHER_CTX *ctx) {
return ctx->cipher->flags & ~EVP_CIPH_MODE_MASK;
}
uint32_t EVP_CIPHER_CTX_mode(const EVP_CIPHER_CTX *ctx) {
return ctx->cipher->flags & EVP_CIPH_MODE_MASK;
}
int EVP_CIPHER_CTX_ctrl(EVP_CIPHER_CTX *ctx, int command, int arg, void *ptr) {
int ret;
if (!ctx->cipher) {
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_NO_CIPHER_SET);
return 0;
}
if (!ctx->cipher->ctrl) {
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_CTRL_NOT_IMPLEMENTED);
return 0;
}
ret = ctx->cipher->ctrl(ctx, command, arg, ptr);
if (ret == -1) {
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_CTRL_OPERATION_NOT_IMPLEMENTED);
return 0;
}
return ret;
}
int EVP_CIPHER_CTX_set_padding(EVP_CIPHER_CTX *ctx, int pad) {
if (pad) {
ctx->flags &= ~EVP_CIPH_NO_PADDING;
} else {
ctx->flags |= EVP_CIPH_NO_PADDING;
}
return 1;
}
int EVP_CIPHER_CTX_set_key_length(EVP_CIPHER_CTX *c, unsigned key_len) {
if (c->key_len == key_len) {
return 1;
}
if (key_len == 0 || !(c->cipher->flags & EVP_CIPH_VARIABLE_LENGTH)) {
OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_INVALID_KEY_LENGTH);
return 0;
}
c->key_len = key_len;
return 1;
}
int EVP_CIPHER_nid(const EVP_CIPHER *cipher) { return cipher->nid; }
unsigned EVP_CIPHER_block_size(const EVP_CIPHER *cipher) {
return cipher->block_size;
}
unsigned EVP_CIPHER_key_length(const EVP_CIPHER *cipher) {
return cipher->key_len;
}
unsigned EVP_CIPHER_iv_length(const EVP_CIPHER *cipher) {
return cipher->iv_len;
}
uint32_t EVP_CIPHER_flags(const EVP_CIPHER *cipher) {
return cipher->flags & ~EVP_CIPH_MODE_MASK;
}
uint32_t EVP_CIPHER_mode(const EVP_CIPHER *cipher) {
return cipher->flags & EVP_CIPH_MODE_MASK;
}
int EVP_CipherInit(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher,
const uint8_t *key, const uint8_t *iv, int enc) {
if (cipher) {
EVP_CIPHER_CTX_init(ctx);
}
return EVP_CipherInit_ex(ctx, cipher, NULL, key, iv, enc);
}
int EVP_EncryptInit(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher,
const uint8_t *key, const uint8_t *iv) {
return EVP_CipherInit(ctx, cipher, key, iv, 1);
}
int EVP_DecryptInit(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher,
const uint8_t *key, const uint8_t *iv) {
return EVP_CipherInit(ctx, cipher, key, iv, 0);
}
int EVP_add_cipher_alias(const char *a, const char *b) {
return 1;
}
void EVP_CIPHER_CTX_set_flags(const EVP_CIPHER_CTX *ctx, uint32_t flags) {}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,128 @@
/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
* All rights reserved.
*
* This package is an SSL implementation written
* by Eric Young (eay@cryptsoft.com).
* The implementation was written so as to conform with Netscapes SSL.
*
* This library is free for commercial and non-commercial use as long as
* the following conditions are aheared to. The following conditions
* apply to all code found in this distribution, be it the RC4, RSA,
* lhash, DES, etc., code; not just the SSL code. The SSL documentation
* included with this distribution is covered by the same copyright terms
* except that the holder is Tim Hudson (tjh@cryptsoft.com).
*
* Copyright remains Eric Young's, and as such any Copyright notices in
* the code are not to be removed.
* If this package is used in a product, Eric Young should be given attribution
* as the author of the parts of the library used.
* This can be in the form of a textual message at program startup or
* in documentation (online or textual) provided with the package.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* "This product includes cryptographic software written by
* Eric Young (eay@cryptsoft.com)"
* The word 'cryptographic' can be left out if the rouines from the library
* being used are not cryptographic related :-).
* 4. If you include any Windows specific code (or a derivative thereof) from
* the apps directory (application code) you must include an acknowledgement:
* "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
*
* THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* The licence and distribution terms for any publically available version or
* derivative of this code cannot be changed. i.e. this code cannot simply be
* copied and put under another distribution licence
* [including the GNU Public Licence.] */
#ifndef OPENSSL_HEADER_CIPHER_INTERNAL_H
#define OPENSSL_HEADER_CIPHER_INTERNAL_H
#include <CBigNumBoringSSL_base.h>
#include <CBigNumBoringSSL_aead.h>
#include <CBigNumBoringSSL_aes.h>
#include "../../internal.h"
#include "../modes/internal.h"
#if defined(__cplusplus)
extern "C" {
#endif
// EVP_CIPH_MODE_MASK contains the bits of |flags| that represent the mode.
#define EVP_CIPH_MODE_MASK 0x3f
// EVP_AEAD represents a specific AEAD algorithm.
struct evp_aead_st {
uint8_t key_len;
uint8_t nonce_len;
uint8_t overhead;
uint8_t max_tag_len;
int seal_scatter_supports_extra_in;
// init initialises an |EVP_AEAD_CTX|. If this call returns zero then
// |cleanup| will not be called for that context.
int (*init)(EVP_AEAD_CTX *, const uint8_t *key, size_t key_len,
size_t tag_len);
int (*init_with_direction)(EVP_AEAD_CTX *, const uint8_t *key, size_t key_len,
size_t tag_len, enum evp_aead_direction_t dir);
void (*cleanup)(EVP_AEAD_CTX *);
int (*open)(const EVP_AEAD_CTX *ctx, uint8_t *out, size_t *out_len,
size_t max_out_len, const uint8_t *nonce, size_t nonce_len,
const uint8_t *in, size_t in_len, const uint8_t *ad,
size_t ad_len);
int (*seal_scatter)(const EVP_AEAD_CTX *ctx, uint8_t *out, uint8_t *out_tag,
size_t *out_tag_len, size_t max_out_tag_len,
const uint8_t *nonce, size_t nonce_len, const uint8_t *in,
size_t in_len, const uint8_t *extra_in,
size_t extra_in_len, const uint8_t *ad, size_t ad_len);
int (*open_gather)(const EVP_AEAD_CTX *ctx, uint8_t *out,
const uint8_t *nonce, size_t nonce_len, const uint8_t *in,
size_t in_len, const uint8_t *in_tag, size_t in_tag_len,
const uint8_t *ad, size_t ad_len);
int (*get_iv)(const EVP_AEAD_CTX *ctx, const uint8_t **out_iv,
size_t *out_len);
size_t (*tag_len)(const EVP_AEAD_CTX *ctx, size_t in_Len,
size_t extra_in_len);
};
// aes_ctr_set_key initialises |*aes_key| using |key_bytes| bytes from |key|,
// where |key_bytes| must either be 16, 24 or 32. If not NULL, |*out_block| is
// set to a function that encrypts single blocks. If not NULL, |*gcm_key| is
// initialised to do GHASH with the given key. It returns a function for
// optimised CTR-mode, or NULL if CTR-mode should be built using |*out_block|.
ctr128_f aes_ctr_set_key(AES_KEY *aes_key, GCM128_KEY *gcm_key,
block128_f *out_block, const uint8_t *key,
size_t key_bytes);
#if defined(__cplusplus)
} // extern C
#endif
#endif // OPENSSL_HEADER_CIPHER_INTERNAL_H

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,89 @@
/* Copyright (c) 2017, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#ifndef OPENSSL_HEADER_FIPSMODULE_DELOCATE_H
#define OPENSSL_HEADER_FIPSMODULE_DELOCATE_H
#include <CBigNumBoringSSL_base.h>
#include "../internal.h"
#if !defined(BORINGSSL_SHARED_LIBRARY) && defined(BORINGSSL_FIPS) && \
!defined(OPENSSL_ASAN) && !defined(OPENSSL_MSAN)
#define DEFINE_BSS_GET(type, name) \
static type name __attribute__((used)); \
type *name##_bss_get(void) __attribute__((const));
// For FIPS builds we require that CRYPTO_ONCE_INIT be zero.
#define DEFINE_STATIC_ONCE(name) DEFINE_BSS_GET(CRYPTO_once_t, name)
// For FIPS builds we require that CRYPTO_STATIC_MUTEX_INIT be zero.
#define DEFINE_STATIC_MUTEX(name) \
DEFINE_BSS_GET(struct CRYPTO_STATIC_MUTEX, name)
// For FIPS builds we require that CRYPTO_EX_DATA_CLASS_INIT be zero.
#define DEFINE_STATIC_EX_DATA_CLASS(name) \
DEFINE_BSS_GET(CRYPTO_EX_DATA_CLASS, name)
#else
#define DEFINE_BSS_GET(type, name) \
static type name; \
static type *name##_bss_get(void) { return &name; }
#define DEFINE_STATIC_ONCE(name) \
static CRYPTO_once_t name = CRYPTO_ONCE_INIT; \
static CRYPTO_once_t *name##_bss_get(void) { return &name; }
#define DEFINE_STATIC_MUTEX(name) \
static struct CRYPTO_STATIC_MUTEX name = CRYPTO_STATIC_MUTEX_INIT; \
static struct CRYPTO_STATIC_MUTEX *name##_bss_get(void) { return &name; }
#define DEFINE_STATIC_EX_DATA_CLASS(name) \
static CRYPTO_EX_DATA_CLASS name = CRYPTO_EX_DATA_CLASS_INIT; \
static CRYPTO_EX_DATA_CLASS *name##_bss_get(void) { return &name; }
#endif
#define DEFINE_DATA(type, name, accessor_decorations) \
DEFINE_BSS_GET(type, name##_storage) \
DEFINE_STATIC_ONCE(name##_once) \
static void name##_do_init(type *out); \
static void name##_init(void) { name##_do_init(name##_storage_bss_get()); } \
accessor_decorations type *name(void) { \
CRYPTO_once(name##_once_bss_get(), name##_init); \
/* See http://c-faq.com/ansi/constmismatch.html for why the following \
* cast is needed. */ \
return (const type *)name##_storage_bss_get(); \
} \
static void name##_do_init(type *out)
// DEFINE_METHOD_FUNCTION defines a function named |name| which returns a
// method table of type const |type|*. In FIPS mode, to avoid rel.ro data, it
// is split into a CRYPTO_once_t-guarded initializer in the module and
// unhashed, non-module accessor functions to space reserved in the BSS. The
// method table is initialized by a caller-supplied function which takes a
// parameter named |out| of type |type|*. The caller should follow the macro
// invocation with the body of this function:
//
// DEFINE_METHOD_FUNCTION(EVP_MD, EVP_md4) {
// out->type = NID_md4;
// out->md_size = MD4_DIGEST_LENGTH;
// out->flags = 0;
// out->init = md4_init;
// out->update = md4_update;
// out->final = md4_final;
// out->block_size = 64;
// out->ctx_size = sizeof(MD4_CTX);
// }
//
// This mechanism does not use a static initializer because their execution
// order is undefined. See FIPS.md for more details.
#define DEFINE_METHOD_FUNCTION(type, name) DEFINE_DATA(type, name, const)
#define DEFINE_LOCAL_DATA(type, name) DEFINE_DATA(type, name, static const)
#endif // OPENSSL_HEADER_FIPSMODULE_DELOCATE_H

View File

@ -0,0 +1,265 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__arm__) && defined(__APPLE__)
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
#include <CBigNumBoringSSL_arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
@ instructions are in aesv8-armx.pl.)
.text
#if defined(__thumb2__) || defined(__clang__)
.syntax unified
#define ldrplb ldrbpl
#define ldrneb ldrbne
#endif
#if defined(__thumb2__)
.thumb
#else
.code 32
#endif
#if __ARM_MAX_ARCH__>=7
.globl _gcm_init_neon
.private_extern _gcm_init_neon
#ifdef __thumb2__
.thumb_func _gcm_init_neon
#endif
.align 4
_gcm_init_neon:
vld1.64 d7,[r1]! @ load H
vmov.i8 q8,#0xe1
vld1.64 d6,[r1]
vshl.i64 d17,#57
vshr.u64 d16,#63 @ t0=0xc2....01
vdup.8 q9,d7[7]
vshr.u64 d26,d6,#63
vshr.s8 q9,#7 @ broadcast carry bit
vshl.i64 q3,q3,#1
vand q8,q8,q9
vorr d7,d26 @ H<<<=1
veor q3,q3,q8 @ twisted H
vstmia r0,{q3}
bx lr @ bx lr
.globl _gcm_gmult_neon
.private_extern _gcm_gmult_neon
#ifdef __thumb2__
.thumb_func _gcm_gmult_neon
#endif
.align 4
_gcm_gmult_neon:
vld1.64 d7,[r0]! @ load Xi
vld1.64 d6,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
vldmia r1,{d26,d27} @ load twisted H
vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
mov r3,#16
b Lgmult_neon
.globl _gcm_ghash_neon
.private_extern _gcm_ghash_neon
#ifdef __thumb2__
.thumb_func _gcm_ghash_neon
#endif
.align 4
_gcm_ghash_neon:
vld1.64 d1,[r0]! @ load Xi
vld1.64 d0,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
vldmia r1,{d26,d27} @ load twisted H
vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 q0,q0
#endif
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
Loop_neon:
vld1.64 d7,[r2]! @ load inp
vld1.64 d6,[r2]!
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
veor q3,q0 @ inp^=Xi
Lgmult_neon:
vext.8 d16, d26, d26, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d0, d6, d6, #1 @ B1
vmull.p8 q0, d26, d0 @ E = A*B1
vext.8 d18, d26, d26, #2 @ A2
vmull.p8 q9, d18, d6 @ H = A2*B
vext.8 d22, d6, d6, #2 @ B2
vmull.p8 q11, d26, d22 @ G = A*B2
vext.8 d20, d26, d26, #3 @ A3
veor q8, q8, q0 @ L = E + F
vmull.p8 q10, d20, d6 @ J = A3*B
vext.8 d0, d6, d6, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q0, d26, d0 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d6, d6, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d26, d22 @ K = A*B4
veor q10, q10, q0 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q0, d26, d6 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q0, q0, q8
veor q0, q0, q10
veor d6,d6,d7 @ Karatsuba pre-processing
vext.8 d16, d28, d28, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d2, d6, d6, #1 @ B1
vmull.p8 q1, d28, d2 @ E = A*B1
vext.8 d18, d28, d28, #2 @ A2
vmull.p8 q9, d18, d6 @ H = A2*B
vext.8 d22, d6, d6, #2 @ B2
vmull.p8 q11, d28, d22 @ G = A*B2
vext.8 d20, d28, d28, #3 @ A3
veor q8, q8, q1 @ L = E + F
vmull.p8 q10, d20, d6 @ J = A3*B
vext.8 d2, d6, d6, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q1, d28, d2 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d6, d6, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d28, d22 @ K = A*B4
veor q10, q10, q1 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q1, d28, d6 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q1, q1, q8
veor q1, q1, q10
vext.8 d16, d27, d27, #1 @ A1
vmull.p8 q8, d16, d7 @ F = A1*B
vext.8 d4, d7, d7, #1 @ B1
vmull.p8 q2, d27, d4 @ E = A*B1
vext.8 d18, d27, d27, #2 @ A2
vmull.p8 q9, d18, d7 @ H = A2*B
vext.8 d22, d7, d7, #2 @ B2
vmull.p8 q11, d27, d22 @ G = A*B2
vext.8 d20, d27, d27, #3 @ A3
veor q8, q8, q2 @ L = E + F
vmull.p8 q10, d20, d7 @ J = A3*B
vext.8 d4, d7, d7, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q2, d27, d4 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d7, d7, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d27, d22 @ K = A*B4
veor q10, q10, q2 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q2, d27, d7 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q2, q2, q8
veor q2, q2, q10
veor q1,q1,q0 @ Karatsuba post-processing
veor q1,q1,q2
veor d1,d1,d2
veor d4,d4,d3 @ Xh|Xl - 256-bit result
@ equivalent of reduction_avx from ghash-x86_64.pl
vshl.i64 q9,q0,#57 @ 1st phase
vshl.i64 q10,q0,#62
veor q10,q10,q9 @
vshl.i64 q9,q0,#63
veor q10, q10, q9 @
veor d1,d1,d20 @
veor d4,d4,d21
vshr.u64 q10,q0,#1 @ 2nd phase
veor q2,q2,q0
veor q0,q0,q10 @
vshr.u64 q10,q10,#6
vshr.u64 q0,q0,#1 @
veor q0,q0,q2 @
veor q0,q0,q10 @
subs r3,#16
bne Loop_neon
#ifdef __ARMEL__
vrev64.8 q0,q0
#endif
sub r0,#16
vst1.64 d1,[r0]! @ write out Xi
vst1.64 d0,[r0]
bx lr @ bx lr
#endif
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM
#endif // defined(__arm__) && defined(__APPLE__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,262 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__arm__) && defined(__linux__)
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
#include <CBigNumBoringSSL_arm_arch.h>
@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
@ instructions are in aesv8-armx.pl.)
.arch armv7-a
.text
#if defined(__thumb2__) || defined(__clang__)
.syntax unified
#define ldrplb ldrbpl
#define ldrneb ldrbne
#endif
#if defined(__thumb2__)
.thumb
#else
.code 32
#endif
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon
.globl gcm_init_neon
.hidden gcm_init_neon
.type gcm_init_neon,%function
.align 4
gcm_init_neon:
vld1.64 d7,[r1]! @ load H
vmov.i8 q8,#0xe1
vld1.64 d6,[r1]
vshl.i64 d17,#57
vshr.u64 d16,#63 @ t0=0xc2....01
vdup.8 q9,d7[7]
vshr.u64 d26,d6,#63
vshr.s8 q9,#7 @ broadcast carry bit
vshl.i64 q3,q3,#1
vand q8,q8,q9
vorr d7,d26 @ H<<<=1
veor q3,q3,q8 @ twisted H
vstmia r0,{q3}
bx lr @ bx lr
.size gcm_init_neon,.-gcm_init_neon
.globl gcm_gmult_neon
.hidden gcm_gmult_neon
.type gcm_gmult_neon,%function
.align 4
gcm_gmult_neon:
vld1.64 d7,[r0]! @ load Xi
vld1.64 d6,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
vldmia r1,{d26,d27} @ load twisted H
vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
mov r3,#16
b .Lgmult_neon
.size gcm_gmult_neon,.-gcm_gmult_neon
.globl gcm_ghash_neon
.hidden gcm_ghash_neon
.type gcm_ghash_neon,%function
.align 4
gcm_ghash_neon:
vld1.64 d1,[r0]! @ load Xi
vld1.64 d0,[r0]!
vmov.i64 d29,#0x0000ffffffffffff
vldmia r1,{d26,d27} @ load twisted H
vmov.i64 d30,#0x00000000ffffffff
#ifdef __ARMEL__
vrev64.8 q0,q0
#endif
vmov.i64 d31,#0x000000000000ffff
veor d28,d26,d27 @ Karatsuba pre-processing
.Loop_neon:
vld1.64 d7,[r2]! @ load inp
vld1.64 d6,[r2]!
#ifdef __ARMEL__
vrev64.8 q3,q3
#endif
veor q3,q0 @ inp^=Xi
.Lgmult_neon:
vext.8 d16, d26, d26, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d0, d6, d6, #1 @ B1
vmull.p8 q0, d26, d0 @ E = A*B1
vext.8 d18, d26, d26, #2 @ A2
vmull.p8 q9, d18, d6 @ H = A2*B
vext.8 d22, d6, d6, #2 @ B2
vmull.p8 q11, d26, d22 @ G = A*B2
vext.8 d20, d26, d26, #3 @ A3
veor q8, q8, q0 @ L = E + F
vmull.p8 q10, d20, d6 @ J = A3*B
vext.8 d0, d6, d6, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q0, d26, d0 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d6, d6, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d26, d22 @ K = A*B4
veor q10, q10, q0 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q0, d26, d6 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q0, q0, q8
veor q0, q0, q10
veor d6,d6,d7 @ Karatsuba pre-processing
vext.8 d16, d28, d28, #1 @ A1
vmull.p8 q8, d16, d6 @ F = A1*B
vext.8 d2, d6, d6, #1 @ B1
vmull.p8 q1, d28, d2 @ E = A*B1
vext.8 d18, d28, d28, #2 @ A2
vmull.p8 q9, d18, d6 @ H = A2*B
vext.8 d22, d6, d6, #2 @ B2
vmull.p8 q11, d28, d22 @ G = A*B2
vext.8 d20, d28, d28, #3 @ A3
veor q8, q8, q1 @ L = E + F
vmull.p8 q10, d20, d6 @ J = A3*B
vext.8 d2, d6, d6, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q1, d28, d2 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d6, d6, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d28, d22 @ K = A*B4
veor q10, q10, q1 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q1, d28, d6 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q1, q1, q8
veor q1, q1, q10
vext.8 d16, d27, d27, #1 @ A1
vmull.p8 q8, d16, d7 @ F = A1*B
vext.8 d4, d7, d7, #1 @ B1
vmull.p8 q2, d27, d4 @ E = A*B1
vext.8 d18, d27, d27, #2 @ A2
vmull.p8 q9, d18, d7 @ H = A2*B
vext.8 d22, d7, d7, #2 @ B2
vmull.p8 q11, d27, d22 @ G = A*B2
vext.8 d20, d27, d27, #3 @ A3
veor q8, q8, q2 @ L = E + F
vmull.p8 q10, d20, d7 @ J = A3*B
vext.8 d4, d7, d7, #3 @ B3
veor q9, q9, q11 @ M = G + H
vmull.p8 q2, d27, d4 @ I = A*B3
veor d16, d16, d17 @ t0 = (L) (P0 + P1) << 8
vand d17, d17, d29
vext.8 d22, d7, d7, #4 @ B4
veor d18, d18, d19 @ t1 = (M) (P2 + P3) << 16
vand d19, d19, d30
vmull.p8 q11, d27, d22 @ K = A*B4
veor q10, q10, q2 @ N = I + J
veor d16, d16, d17
veor d18, d18, d19
veor d20, d20, d21 @ t2 = (N) (P4 + P5) << 24
vand d21, d21, d31
vext.8 q8, q8, q8, #15
veor d22, d22, d23 @ t3 = (K) (P6 + P7) << 32
vmov.i64 d23, #0
vext.8 q9, q9, q9, #14
veor d20, d20, d21
vmull.p8 q2, d27, d7 @ D = A*B
vext.8 q11, q11, q11, #12
vext.8 q10, q10, q10, #13
veor q8, q8, q9
veor q10, q10, q11
veor q2, q2, q8
veor q2, q2, q10
veor q1,q1,q0 @ Karatsuba post-processing
veor q1,q1,q2
veor d1,d1,d2
veor d4,d4,d3 @ Xh|Xl - 256-bit result
@ equivalent of reduction_avx from ghash-x86_64.pl
vshl.i64 q9,q0,#57 @ 1st phase
vshl.i64 q10,q0,#62
veor q10,q10,q9 @
vshl.i64 q9,q0,#63
veor q10, q10, q9 @
veor d1,d1,d20 @
veor d4,d4,d21
vshr.u64 q10,q0,#1 @ 2nd phase
veor q2,q2,q0
veor q0,q0,q10 @
vshr.u64 q10,q10,#6
vshr.u64 q0,q0,#1 @
veor q0,q0,q2 @
veor q0,q0,q10 @
subs r3,#16
bne .Loop_neon
#ifdef __ARMEL__
vrev64.8 q0,q0
#endif
sub r0,#16
vst1.64 d1,[r0]! @ write out Xi
vst1.64 d0,[r0]
bx lr @ bx lr
.size gcm_ghash_neon,.-gcm_ghash_neon
#endif
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits
#endif // defined(__arm__) && defined(__linux__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,345 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__aarch64__) && defined(__APPLE__)
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
.text
.globl _gcm_init_neon
.private_extern _gcm_init_neon
.align 4
_gcm_init_neon:
// This function is adapted from gcm_init_v8. xC2 is t3.
ld1 {v17.2d}, [x1] // load H
movi v19.16b, #0xe1
shl v19.2d, v19.2d, #57 // 0xc2.0
ext v3.16b, v17.16b, v17.16b, #8
ushr v18.2d, v19.2d, #63
dup v17.4s, v17.s[1]
ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
ushr v18.2d, v3.2d, #63
sshr v17.4s, v17.4s, #31 // broadcast carry bit
and v18.16b, v18.16b, v16.16b
shl v3.2d, v3.2d, #1
ext v18.16b, v18.16b, v18.16b, #8
and v16.16b, v16.16b, v17.16b
orr v3.16b, v3.16b, v18.16b // H<<<=1
eor v5.16b, v3.16b, v16.16b // twisted H
st1 {v5.2d}, [x0] // store Htable[0]
ret
.globl _gcm_gmult_neon
.private_extern _gcm_gmult_neon
.align 4
_gcm_gmult_neon:
ld1 {v3.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
adrp x9, Lmasks@PAGE // load constants
add x9, x9, Lmasks@PAGEOFF
ld1 {v24.2d, v25.2d}, [x9]
rev64 v3.16b, v3.16b // byteswap Xi
ext v3.16b, v3.16b, v3.16b, #8
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
mov x3, #16
b Lgmult_neon
.globl _gcm_ghash_neon
.private_extern _gcm_ghash_neon
.align 4
_gcm_ghash_neon:
ld1 {v0.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
adrp x9, Lmasks@PAGE // load constants
add x9, x9, Lmasks@PAGEOFF
ld1 {v24.2d, v25.2d}, [x9]
rev64 v0.16b, v0.16b // byteswap Xi
ext v0.16b, v0.16b, v0.16b, #8
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
Loop_neon:
ld1 {v3.16b}, [x2], #16 // load inp
rev64 v3.16b, v3.16b // byteswap inp
ext v3.16b, v3.16b, v3.16b, #8
eor v3.16b, v3.16b, v0.16b // inp ^= Xi
Lgmult_neon:
// Split the input into v3 and v4. (The upper halves are unused,
// so it is okay to leave them alone.)
ins v4.d[0], v3.d[1]
ext v16.8b, v5.8b, v5.8b, #1 // A1
pmull v16.8h, v16.8b, v3.8b // F = A1*B
ext v0.8b, v3.8b, v3.8b, #1 // B1
pmull v0.8h, v5.8b, v0.8b // E = A*B1
ext v17.8b, v5.8b, v5.8b, #2 // A2
pmull v17.8h, v17.8b, v3.8b // H = A2*B
ext v19.8b, v3.8b, v3.8b, #2 // B2
pmull v19.8h, v5.8b, v19.8b // G = A*B2
ext v18.8b, v5.8b, v5.8b, #3 // A3
eor v16.16b, v16.16b, v0.16b // L = E + F
pmull v18.8h, v18.8b, v3.8b // J = A3*B
ext v0.8b, v3.8b, v3.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v0.8h, v5.8b, v0.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v3.8b, v3.8b, #4 // B4
eor v18.16b, v18.16b, v0.16b // N = I + J
pmull v19.8h, v5.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v0.8h, v5.8b, v3.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v0.16b, v0.16b, v16.16b
eor v0.16b, v0.16b, v18.16b
eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
ext v16.8b, v7.8b, v7.8b, #1 // A1
pmull v16.8h, v16.8b, v3.8b // F = A1*B
ext v1.8b, v3.8b, v3.8b, #1 // B1
pmull v1.8h, v7.8b, v1.8b // E = A*B1
ext v17.8b, v7.8b, v7.8b, #2 // A2
pmull v17.8h, v17.8b, v3.8b // H = A2*B
ext v19.8b, v3.8b, v3.8b, #2 // B2
pmull v19.8h, v7.8b, v19.8b // G = A*B2
ext v18.8b, v7.8b, v7.8b, #3 // A3
eor v16.16b, v16.16b, v1.16b // L = E + F
pmull v18.8h, v18.8b, v3.8b // J = A3*B
ext v1.8b, v3.8b, v3.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v1.8h, v7.8b, v1.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v3.8b, v3.8b, #4 // B4
eor v18.16b, v18.16b, v1.16b // N = I + J
pmull v19.8h, v7.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v1.8h, v7.8b, v3.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v1.16b, v1.16b, v16.16b
eor v1.16b, v1.16b, v18.16b
ext v16.8b, v6.8b, v6.8b, #1 // A1
pmull v16.8h, v16.8b, v4.8b // F = A1*B
ext v2.8b, v4.8b, v4.8b, #1 // B1
pmull v2.8h, v6.8b, v2.8b // E = A*B1
ext v17.8b, v6.8b, v6.8b, #2 // A2
pmull v17.8h, v17.8b, v4.8b // H = A2*B
ext v19.8b, v4.8b, v4.8b, #2 // B2
pmull v19.8h, v6.8b, v19.8b // G = A*B2
ext v18.8b, v6.8b, v6.8b, #3 // A3
eor v16.16b, v16.16b, v2.16b // L = E + F
pmull v18.8h, v18.8b, v4.8b // J = A3*B
ext v2.8b, v4.8b, v4.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v2.8h, v6.8b, v2.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v4.8b, v4.8b, #4 // B4
eor v18.16b, v18.16b, v2.16b // N = I + J
pmull v19.8h, v6.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v2.8h, v6.8b, v4.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v2.16b, v2.16b, v16.16b
eor v2.16b, v2.16b, v18.16b
ext v16.16b, v0.16b, v2.16b, #8
eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
eor v1.16b, v1.16b, v2.16b
eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result
// This is a no-op due to the ins instruction below.
// ins v2.d[0], v1.d[1]
// equivalent of reduction_avx from ghash-x86_64.pl
shl v17.2d, v0.2d, #57 // 1st phase
shl v18.2d, v0.2d, #62
eor v18.16b, v18.16b, v17.16b //
shl v17.2d, v0.2d, #63
eor v18.16b, v18.16b, v17.16b //
// Note Xm contains {Xl.d[1], Xh.d[0]}.
eor v18.16b, v18.16b, v1.16b
ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0]
ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1]
ushr v18.2d, v0.2d, #1 // 2nd phase
eor v2.16b, v2.16b,v0.16b
eor v0.16b, v0.16b,v18.16b //
ushr v18.2d, v18.2d, #6
ushr v0.2d, v0.2d, #1 //
eor v0.16b, v0.16b, v2.16b //
eor v0.16b, v0.16b, v18.16b //
subs x3, x3, #16
bne Loop_neon
rev64 v0.16b, v0.16b // byteswap Xi and write
ext v0.16b, v0.16b, v0.16b, #8
st1 {v0.16b}, [x0]
ret
.section __TEXT,__const
.align 4
Lmasks:
.quad 0x0000ffffffffffff // k48
.quad 0x00000000ffffffff // k32
.quad 0x000000000000ffff // k16
.quad 0x0000000000000000 // k0
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM
#endif // defined(__aarch64__) && defined(__APPLE__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,348 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__aarch64__) && defined(__linux__)
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
.text
.globl gcm_init_neon
.hidden gcm_init_neon
.type gcm_init_neon,%function
.align 4
gcm_init_neon:
// This function is adapted from gcm_init_v8. xC2 is t3.
ld1 {v17.2d}, [x1] // load H
movi v19.16b, #0xe1
shl v19.2d, v19.2d, #57 // 0xc2.0
ext v3.16b, v17.16b, v17.16b, #8
ushr v18.2d, v19.2d, #63
dup v17.4s, v17.s[1]
ext v16.16b, v18.16b, v19.16b, #8 // t0=0xc2....01
ushr v18.2d, v3.2d, #63
sshr v17.4s, v17.4s, #31 // broadcast carry bit
and v18.16b, v18.16b, v16.16b
shl v3.2d, v3.2d, #1
ext v18.16b, v18.16b, v18.16b, #8
and v16.16b, v16.16b, v17.16b
orr v3.16b, v3.16b, v18.16b // H<<<=1
eor v5.16b, v3.16b, v16.16b // twisted H
st1 {v5.2d}, [x0] // store Htable[0]
ret
.size gcm_init_neon,.-gcm_init_neon
.globl gcm_gmult_neon
.hidden gcm_gmult_neon
.type gcm_gmult_neon,%function
.align 4
gcm_gmult_neon:
ld1 {v3.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
adrp x9, .Lmasks // load constants
add x9, x9, :lo12:.Lmasks
ld1 {v24.2d, v25.2d}, [x9]
rev64 v3.16b, v3.16b // byteswap Xi
ext v3.16b, v3.16b, v3.16b, #8
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
mov x3, #16
b .Lgmult_neon
.size gcm_gmult_neon,.-gcm_gmult_neon
.globl gcm_ghash_neon
.hidden gcm_ghash_neon
.type gcm_ghash_neon,%function
.align 4
gcm_ghash_neon:
ld1 {v0.16b}, [x0] // load Xi
ld1 {v5.1d}, [x1], #8 // load twisted H
ld1 {v6.1d}, [x1]
adrp x9, .Lmasks // load constants
add x9, x9, :lo12:.Lmasks
ld1 {v24.2d, v25.2d}, [x9]
rev64 v0.16b, v0.16b // byteswap Xi
ext v0.16b, v0.16b, v0.16b, #8
eor v7.8b, v5.8b, v6.8b // Karatsuba pre-processing
.Loop_neon:
ld1 {v3.16b}, [x2], #16 // load inp
rev64 v3.16b, v3.16b // byteswap inp
ext v3.16b, v3.16b, v3.16b, #8
eor v3.16b, v3.16b, v0.16b // inp ^= Xi
.Lgmult_neon:
// Split the input into v3 and v4. (The upper halves are unused,
// so it is okay to leave them alone.)
ins v4.d[0], v3.d[1]
ext v16.8b, v5.8b, v5.8b, #1 // A1
pmull v16.8h, v16.8b, v3.8b // F = A1*B
ext v0.8b, v3.8b, v3.8b, #1 // B1
pmull v0.8h, v5.8b, v0.8b // E = A*B1
ext v17.8b, v5.8b, v5.8b, #2 // A2
pmull v17.8h, v17.8b, v3.8b // H = A2*B
ext v19.8b, v3.8b, v3.8b, #2 // B2
pmull v19.8h, v5.8b, v19.8b // G = A*B2
ext v18.8b, v5.8b, v5.8b, #3 // A3
eor v16.16b, v16.16b, v0.16b // L = E + F
pmull v18.8h, v18.8b, v3.8b // J = A3*B
ext v0.8b, v3.8b, v3.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v0.8h, v5.8b, v0.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v3.8b, v3.8b, #4 // B4
eor v18.16b, v18.16b, v0.16b // N = I + J
pmull v19.8h, v5.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v0.8h, v5.8b, v3.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v0.16b, v0.16b, v16.16b
eor v0.16b, v0.16b, v18.16b
eor v3.8b, v3.8b, v4.8b // Karatsuba pre-processing
ext v16.8b, v7.8b, v7.8b, #1 // A1
pmull v16.8h, v16.8b, v3.8b // F = A1*B
ext v1.8b, v3.8b, v3.8b, #1 // B1
pmull v1.8h, v7.8b, v1.8b // E = A*B1
ext v17.8b, v7.8b, v7.8b, #2 // A2
pmull v17.8h, v17.8b, v3.8b // H = A2*B
ext v19.8b, v3.8b, v3.8b, #2 // B2
pmull v19.8h, v7.8b, v19.8b // G = A*B2
ext v18.8b, v7.8b, v7.8b, #3 // A3
eor v16.16b, v16.16b, v1.16b // L = E + F
pmull v18.8h, v18.8b, v3.8b // J = A3*B
ext v1.8b, v3.8b, v3.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v1.8h, v7.8b, v1.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v3.8b, v3.8b, #4 // B4
eor v18.16b, v18.16b, v1.16b // N = I + J
pmull v19.8h, v7.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v1.8h, v7.8b, v3.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v1.16b, v1.16b, v16.16b
eor v1.16b, v1.16b, v18.16b
ext v16.8b, v6.8b, v6.8b, #1 // A1
pmull v16.8h, v16.8b, v4.8b // F = A1*B
ext v2.8b, v4.8b, v4.8b, #1 // B1
pmull v2.8h, v6.8b, v2.8b // E = A*B1
ext v17.8b, v6.8b, v6.8b, #2 // A2
pmull v17.8h, v17.8b, v4.8b // H = A2*B
ext v19.8b, v4.8b, v4.8b, #2 // B2
pmull v19.8h, v6.8b, v19.8b // G = A*B2
ext v18.8b, v6.8b, v6.8b, #3 // A3
eor v16.16b, v16.16b, v2.16b // L = E + F
pmull v18.8h, v18.8b, v4.8b // J = A3*B
ext v2.8b, v4.8b, v4.8b, #3 // B3
eor v17.16b, v17.16b, v19.16b // M = G + H
pmull v2.8h, v6.8b, v2.8b // I = A*B3
// Here we diverge from the 32-bit version. It computes the following
// (instructions reordered for clarity):
//
// veor $t0#lo, $t0#lo, $t0#hi @ t0 = P0 + P1 (L)
// vand $t0#hi, $t0#hi, $k48
// veor $t0#lo, $t0#lo, $t0#hi
//
// veor $t1#lo, $t1#lo, $t1#hi @ t1 = P2 + P3 (M)
// vand $t1#hi, $t1#hi, $k32
// veor $t1#lo, $t1#lo, $t1#hi
//
// veor $t2#lo, $t2#lo, $t2#hi @ t2 = P4 + P5 (N)
// vand $t2#hi, $t2#hi, $k16
// veor $t2#lo, $t2#lo, $t2#hi
//
// veor $t3#lo, $t3#lo, $t3#hi @ t3 = P6 + P7 (K)
// vmov.i64 $t3#hi, #0
//
// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
// upper halves of SIMD registers, so we must split each half into
// separate registers. To compensate, we pair computations up and
// parallelize.
ext v19.8b, v4.8b, v4.8b, #4 // B4
eor v18.16b, v18.16b, v2.16b // N = I + J
pmull v19.8h, v6.8b, v19.8b // K = A*B4
// This can probably be scheduled more efficiently. For now, we just
// pair up independent instructions.
zip1 v20.2d, v16.2d, v17.2d
zip1 v22.2d, v18.2d, v19.2d
zip2 v21.2d, v16.2d, v17.2d
zip2 v23.2d, v18.2d, v19.2d
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
and v21.16b, v21.16b, v24.16b
and v23.16b, v23.16b, v25.16b
eor v20.16b, v20.16b, v21.16b
eor v22.16b, v22.16b, v23.16b
zip1 v16.2d, v20.2d, v21.2d
zip1 v18.2d, v22.2d, v23.2d
zip2 v17.2d, v20.2d, v21.2d
zip2 v19.2d, v22.2d, v23.2d
ext v16.16b, v16.16b, v16.16b, #15 // t0 = t0 << 8
ext v17.16b, v17.16b, v17.16b, #14 // t1 = t1 << 16
pmull v2.8h, v6.8b, v4.8b // D = A*B
ext v19.16b, v19.16b, v19.16b, #12 // t3 = t3 << 32
ext v18.16b, v18.16b, v18.16b, #13 // t2 = t2 << 24
eor v16.16b, v16.16b, v17.16b
eor v18.16b, v18.16b, v19.16b
eor v2.16b, v2.16b, v16.16b
eor v2.16b, v2.16b, v18.16b
ext v16.16b, v0.16b, v2.16b, #8
eor v1.16b, v1.16b, v0.16b // Karatsuba post-processing
eor v1.16b, v1.16b, v2.16b
eor v1.16b, v1.16b, v16.16b // Xm overlaps Xh.lo and Xl.hi
ins v0.d[1], v1.d[0] // Xh|Xl - 256-bit result
// This is a no-op due to the ins instruction below.
// ins v2.d[0], v1.d[1]
// equivalent of reduction_avx from ghash-x86_64.pl
shl v17.2d, v0.2d, #57 // 1st phase
shl v18.2d, v0.2d, #62
eor v18.16b, v18.16b, v17.16b //
shl v17.2d, v0.2d, #63
eor v18.16b, v18.16b, v17.16b //
// Note Xm contains {Xl.d[1], Xh.d[0]}.
eor v18.16b, v18.16b, v1.16b
ins v0.d[1], v18.d[0] // Xl.d[1] ^= t2.d[0]
ins v2.d[0], v18.d[1] // Xh.d[0] ^= t2.d[1]
ushr v18.2d, v0.2d, #1 // 2nd phase
eor v2.16b, v2.16b,v0.16b
eor v0.16b, v0.16b,v18.16b //
ushr v18.2d, v18.2d, #6
ushr v0.2d, v0.2d, #1 //
eor v0.16b, v0.16b, v2.16b //
eor v0.16b, v0.16b, v18.16b //
subs x3, x3, #16
bne .Loop_neon
rev64 v0.16b, v0.16b // byteswap Xi and write
ext v0.16b, v0.16b, v0.16b, #8
st1 {v0.16b}, [x0]
ret
.size gcm_ghash_neon,.-gcm_ghash_neon
.section .rodata
.align 4
.Lmasks:
.quad 0x0000ffffffffffff // k48
.quad 0x00000000ffffffff // k32
.quad 0x000000000000ffff // k16
.quad 0x0000000000000000 // k0
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits
#endif // defined(__aarch64__) && defined(__linux__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,301 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__i386__) && defined(__linux__)
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
.text
.globl gcm_gmult_ssse3
.hidden gcm_gmult_ssse3
.type gcm_gmult_ssse3,@function
.align 16
gcm_gmult_ssse3:
.L_gcm_gmult_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
movdqu (%edi),%xmm0
call .L000pic_point
.L000pic_point:
popl %eax
movdqa .Lreverse_bytes-.L000pic_point(%eax),%xmm7
movdqa .Llow4_mask-.L000pic_point(%eax),%xmm2
.byte 102,15,56,0,199
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
.L001loop_row_1:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L001loop_row_1
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
.L002loop_row_2:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L002loop_row_2
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $6,%eax
.L003loop_row_3:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L003loop_row_3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
.byte 102,15,56,0,215
movdqu %xmm2,(%edi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_gmult_ssse3,.-.L_gcm_gmult_ssse3_begin
.globl gcm_ghash_ssse3
.hidden gcm_ghash_ssse3
.type gcm_ghash_ssse3,@function
.align 16
gcm_ghash_ssse3:
.L_gcm_ghash_ssse3_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%edi
movl 24(%esp),%esi
movl 28(%esp),%edx
movl 32(%esp),%ecx
movdqu (%edi),%xmm0
call .L004pic_point
.L004pic_point:
popl %ebx
movdqa .Lreverse_bytes-.L004pic_point(%ebx),%xmm7
andl $-16,%ecx
.byte 102,15,56,0,199
pxor %xmm3,%xmm3
.L005loop_ghash:
movdqa .Llow4_mask-.L004pic_point(%ebx),%xmm2
movdqu (%edx),%xmm1
.byte 102,15,56,0,207
pxor %xmm1,%xmm0
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
pxor %xmm2,%xmm2
movl $5,%eax
.L006loop_row_4:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L006loop_row_4
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $5,%eax
.L007loop_row_5:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L007loop_row_5
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movl $6,%eax
.L008loop_row_6:
movdqa (%esi),%xmm4
leal 16(%esi),%esi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subl $1,%eax
jnz .L008loop_row_6
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movdqa %xmm2,%xmm0
leal -256(%esi),%esi
leal 16(%edx),%edx
subl $16,%ecx
jnz .L005loop_ghash
.byte 102,15,56,0,199
movdqu %xmm0,(%edi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_ghash_ssse3,.-.L_gcm_ghash_ssse3_begin
.align 16
.Lreverse_bytes:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.align 16
.Llow4_mask:
.long 252645135,252645135,252645135,252645135
#endif
.section .note.GNU-stack,"",@progbits
#endif // defined(__i386__) && defined(__linux__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,434 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__x86_64__) && defined(__linux__)
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
.text
.type gcm_gmult_ssse3, @function
.globl gcm_gmult_ssse3
.hidden gcm_gmult_ssse3
.align 16
gcm_gmult_ssse3:
.cfi_startproc
.Lgmult_seh_begin:
movdqu (%rdi),%xmm0
movdqa .Lreverse_bytes(%rip),%xmm10
movdqa .Llow4_mask(%rip),%xmm2
.byte 102,65,15,56,0,194
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
movq $5,%rax
.Loop_row_1:
movdqa (%rsi),%xmm4
leaq 16(%rsi),%rsi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subq $1,%rax
jnz .Loop_row_1
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movq $5,%rax
.Loop_row_2:
movdqa (%rsi),%xmm4
leaq 16(%rsi),%rsi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subq $1,%rax
jnz .Loop_row_2
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movq $6,%rax
.Loop_row_3:
movdqa (%rsi),%xmm4
leaq 16(%rsi),%rsi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subq $1,%rax
jnz .Loop_row_3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
.byte 102,65,15,56,0,210
movdqu %xmm2,(%rdi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
.byte 0xf3,0xc3
.Lgmult_seh_end:
.cfi_endproc
.size gcm_gmult_ssse3,.-gcm_gmult_ssse3
.type gcm_ghash_ssse3, @function
.globl gcm_ghash_ssse3
.hidden gcm_ghash_ssse3
.align 16
gcm_ghash_ssse3:
.Lghash_seh_begin:
.cfi_startproc
movdqu (%rdi),%xmm0
movdqa .Lreverse_bytes(%rip),%xmm10
movdqa .Llow4_mask(%rip),%xmm11
andq $-16,%rcx
.byte 102,65,15,56,0,194
pxor %xmm3,%xmm3
.Loop_ghash:
movdqu (%rdx),%xmm1
.byte 102,65,15,56,0,202
pxor %xmm1,%xmm0
movdqa %xmm11,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm11,%xmm0
pxor %xmm2,%xmm2
movq $5,%rax
.Loop_row_4:
movdqa (%rsi),%xmm4
leaq 16(%rsi),%rsi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subq $1,%rax
jnz .Loop_row_4
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movq $5,%rax
.Loop_row_5:
movdqa (%rsi),%xmm4
leaq 16(%rsi),%rsi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subq $1,%rax
jnz .Loop_row_5
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movq $6,%rax
.Loop_row_6:
movdqa (%rsi),%xmm4
leaq 16(%rsi),%rsi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subq $1,%rax
jnz .Loop_row_6
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movdqa %xmm2,%xmm0
leaq -256(%rsi),%rsi
leaq 16(%rdx),%rdx
subq $16,%rcx
jnz .Loop_ghash
.byte 102,65,15,56,0,194
movdqu %xmm0,(%rdi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
.byte 0xf3,0xc3
.Lghash_seh_end:
.cfi_endproc
.size gcm_ghash_ssse3,.-gcm_ghash_ssse3
.align 16
.Lreverse_bytes:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.Llow4_mask:
.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
#endif
.section .note.GNU-stack,"",@progbits
#endif // defined(__x86_64__) && defined(__linux__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,433 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__x86_64__) && defined(__APPLE__)
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
.text
.globl _gcm_gmult_ssse3
.private_extern _gcm_gmult_ssse3
.p2align 4
_gcm_gmult_ssse3:
L$gmult_seh_begin:
movdqu (%rdi),%xmm0
movdqa L$reverse_bytes(%rip),%xmm10
movdqa L$low4_mask(%rip),%xmm2
.byte 102,65,15,56,0,194
movdqa %xmm2,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm2,%xmm0
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
movq $5,%rax
L$oop_row_1:
movdqa (%rsi),%xmm4
leaq 16(%rsi),%rsi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subq $1,%rax
jnz L$oop_row_1
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movq $5,%rax
L$oop_row_2:
movdqa (%rsi),%xmm4
leaq 16(%rsi),%rsi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subq $1,%rax
jnz L$oop_row_2
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movq $6,%rax
L$oop_row_3:
movdqa (%rsi),%xmm4
leaq 16(%rsi),%rsi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subq $1,%rax
jnz L$oop_row_3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
.byte 102,65,15,56,0,210
movdqu %xmm2,(%rdi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
.byte 0xf3,0xc3
L$gmult_seh_end:
.globl _gcm_ghash_ssse3
.private_extern _gcm_ghash_ssse3
.p2align 4
_gcm_ghash_ssse3:
L$ghash_seh_begin:
movdqu (%rdi),%xmm0
movdqa L$reverse_bytes(%rip),%xmm10
movdqa L$low4_mask(%rip),%xmm11
andq $-16,%rcx
.byte 102,65,15,56,0,194
pxor %xmm3,%xmm3
L$oop_ghash:
movdqu (%rdx),%xmm1
.byte 102,65,15,56,0,202
pxor %xmm1,%xmm0
movdqa %xmm11,%xmm1
pandn %xmm0,%xmm1
psrld $4,%xmm1
pand %xmm11,%xmm0
pxor %xmm2,%xmm2
movq $5,%rax
L$oop_row_4:
movdqa (%rsi),%xmm4
leaq 16(%rsi),%rsi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subq $1,%rax
jnz L$oop_row_4
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movq $5,%rax
L$oop_row_5:
movdqa (%rsi),%xmm4
leaq 16(%rsi),%rsi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subq $1,%rax
jnz L$oop_row_5
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movq $6,%rax
L$oop_row_6:
movdqa (%rsi),%xmm4
leaq 16(%rsi),%rsi
movdqa %xmm2,%xmm6
.byte 102,15,58,15,243,1
movdqa %xmm6,%xmm3
psrldq $1,%xmm2
movdqa %xmm4,%xmm5
.byte 102,15,56,0,224
.byte 102,15,56,0,233
pxor %xmm5,%xmm2
movdqa %xmm4,%xmm5
psllq $60,%xmm5
movdqa %xmm5,%xmm6
pslldq $8,%xmm6
pxor %xmm6,%xmm3
psrldq $8,%xmm5
pxor %xmm5,%xmm2
psrlq $4,%xmm4
pxor %xmm4,%xmm2
subq $1,%rax
jnz L$oop_row_6
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $1,%xmm3
pxor %xmm3,%xmm2
psrlq $5,%xmm3
pxor %xmm3,%xmm2
pxor %xmm3,%xmm3
movdqa %xmm2,%xmm0
leaq -256(%rsi),%rsi
leaq 16(%rdx),%rdx
subq $16,%rcx
jnz L$oop_ghash
.byte 102,65,15,56,0,194
movdqu %xmm0,(%rdi)
pxor %xmm0,%xmm0
pxor %xmm1,%xmm1
pxor %xmm2,%xmm2
pxor %xmm3,%xmm3
pxor %xmm4,%xmm4
pxor %xmm5,%xmm5
pxor %xmm6,%xmm6
.byte 0xf3,0xc3
L$ghash_seh_end:
.p2align 4
L$reverse_bytes:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
L$low4_mask:
.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
#endif
#endif // defined(__x86_64__) && defined(__APPLE__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,337 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__i386__) && defined(__linux__)
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
.text
.globl gcm_init_clmul
.hidden gcm_init_clmul
.type gcm_init_clmul,@function
.align 16
gcm_init_clmul:
.L_gcm_init_clmul_begin:
movl 4(%esp),%edx
movl 8(%esp),%eax
call .L000pic
.L000pic:
popl %ecx
leal .Lbswap-.L000pic(%ecx),%ecx
movdqu (%eax),%xmm2
pshufd $78,%xmm2,%xmm2
pshufd $255,%xmm2,%xmm4
movdqa %xmm2,%xmm3
psllq $1,%xmm2
pxor %xmm5,%xmm5
psrlq $63,%xmm3
pcmpgtd %xmm4,%xmm5
pslldq $8,%xmm3
por %xmm3,%xmm2
pand 16(%ecx),%xmm5
pxor %xmm5,%xmm2
movdqa %xmm2,%xmm0
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
pshufd $78,%xmm2,%xmm4
pxor %xmm0,%xmm3
pxor %xmm2,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
xorps %xmm0,%xmm3
xorps %xmm1,%xmm3
movdqa %xmm3,%xmm4
psrldq $8,%xmm3
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
pshufd $78,%xmm2,%xmm3
pshufd $78,%xmm0,%xmm4
pxor %xmm2,%xmm3
movdqu %xmm2,(%edx)
pxor %xmm0,%xmm4
movdqu %xmm0,16(%edx)
.byte 102,15,58,15,227,8
movdqu %xmm4,32(%edx)
ret
.size gcm_init_clmul,.-.L_gcm_init_clmul_begin
.globl gcm_gmult_clmul
.hidden gcm_gmult_clmul
.type gcm_gmult_clmul,@function
.align 16
gcm_gmult_clmul:
.L_gcm_gmult_clmul_begin:
movl 4(%esp),%eax
movl 8(%esp),%edx
call .L001pic
.L001pic:
popl %ecx
leal .Lbswap-.L001pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movups (%edx),%xmm2
.byte 102,15,56,0,197
movups 32(%edx),%xmm4
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
pxor %xmm0,%xmm3
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
xorps %xmm0,%xmm3
xorps %xmm1,%xmm3
movdqa %xmm3,%xmm4
psrldq $8,%xmm3
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
ret
.size gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
.globl gcm_ghash_clmul
.hidden gcm_ghash_clmul
.type gcm_ghash_clmul,@function
.align 16
gcm_ghash_clmul:
.L_gcm_ghash_clmul_begin:
pushl %ebp
pushl %ebx
pushl %esi
pushl %edi
movl 20(%esp),%eax
movl 24(%esp),%edx
movl 28(%esp),%esi
movl 32(%esp),%ebx
call .L002pic
.L002pic:
popl %ecx
leal .Lbswap-.L002pic(%ecx),%ecx
movdqu (%eax),%xmm0
movdqa (%ecx),%xmm5
movdqu (%edx),%xmm2
.byte 102,15,56,0,197
subl $16,%ebx
jz .L003odd_tail
movdqu (%esi),%xmm3
movdqu 16(%esi),%xmm6
.byte 102,15,56,0,221
.byte 102,15,56,0,245
movdqu 32(%edx),%xmm5
pxor %xmm3,%xmm0
pshufd $78,%xmm6,%xmm3
movdqa %xmm6,%xmm7
pxor %xmm6,%xmm3
leal 32(%esi),%esi
.byte 102,15,58,68,242,0
.byte 102,15,58,68,250,17
.byte 102,15,58,68,221,0
movups 16(%edx),%xmm2
nop
subl $32,%ebx
jbe .L004even_tail
jmp .L005mod_loop
.align 32
.L005mod_loop:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
nop
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,229,16
movups (%edx),%xmm2
xorps %xmm6,%xmm0
movdqa (%ecx),%xmm5
xorps %xmm7,%xmm1
movdqu (%esi),%xmm7
pxor %xmm0,%xmm3
movdqu 16(%esi),%xmm6
pxor %xmm1,%xmm3
.byte 102,15,56,0,253
pxor %xmm3,%xmm4
movdqa %xmm4,%xmm3
psrldq $8,%xmm4
pslldq $8,%xmm3
pxor %xmm4,%xmm1
pxor %xmm3,%xmm0
.byte 102,15,56,0,245
pxor %xmm7,%xmm1
movdqa %xmm6,%xmm7
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
.byte 102,15,58,68,242,0
movups 32(%edx),%xmm5
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
pshufd $78,%xmm7,%xmm3
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm7,%xmm3
pxor %xmm4,%xmm1
.byte 102,15,58,68,250,17
movups 16(%edx),%xmm2
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
.byte 102,15,58,68,221,0
leal 32(%esi),%esi
subl $32,%ebx
ja .L005mod_loop
.L004even_tail:
pshufd $78,%xmm0,%xmm4
movdqa %xmm0,%xmm1
pxor %xmm0,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,229,16
movdqa (%ecx),%xmm5
xorps %xmm6,%xmm0
xorps %xmm7,%xmm1
pxor %xmm0,%xmm3
pxor %xmm1,%xmm3
pxor %xmm3,%xmm4
movdqa %xmm4,%xmm3
psrldq $8,%xmm4
pslldq $8,%xmm3
pxor %xmm4,%xmm1
pxor %xmm3,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
testl %ebx,%ebx
jnz .L006done
movups (%edx),%xmm2
.L003odd_tail:
movdqu (%esi),%xmm3
.byte 102,15,56,0,221
pxor %xmm3,%xmm0
movdqa %xmm0,%xmm1
pshufd $78,%xmm0,%xmm3
pshufd $78,%xmm2,%xmm4
pxor %xmm0,%xmm3
pxor %xmm2,%xmm4
.byte 102,15,58,68,194,0
.byte 102,15,58,68,202,17
.byte 102,15,58,68,220,0
xorps %xmm0,%xmm3
xorps %xmm1,%xmm3
movdqa %xmm3,%xmm4
psrldq $8,%xmm3
pslldq $8,%xmm4
pxor %xmm3,%xmm1
pxor %xmm4,%xmm0
movdqa %xmm0,%xmm4
movdqa %xmm0,%xmm3
psllq $5,%xmm0
pxor %xmm0,%xmm3
psllq $1,%xmm0
pxor %xmm3,%xmm0
psllq $57,%xmm0
movdqa %xmm0,%xmm3
pslldq $8,%xmm0
psrldq $8,%xmm3
pxor %xmm4,%xmm0
pxor %xmm3,%xmm1
movdqa %xmm0,%xmm4
psrlq $1,%xmm0
pxor %xmm4,%xmm1
pxor %xmm0,%xmm4
psrlq $5,%xmm0
pxor %xmm4,%xmm0
psrlq $1,%xmm0
pxor %xmm1,%xmm0
.L006done:
.byte 102,15,56,0,197
movdqu %xmm0,(%eax)
popl %edi
popl %esi
popl %ebx
popl %ebp
ret
.size gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
.align 64
.Lbswap:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
.byte 0
#endif
.section .note.GNU-stack,"",@progbits
#endif // defined(__i386__) && defined(__linux__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,263 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__arm__) && defined(__APPLE__)
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
#include <CBigNumBoringSSL_arm_arch.h>
.text
.code 32
#undef __thumb2__
.globl _gcm_init_v8
.private_extern _gcm_init_v8
#ifdef __thumb2__
.thumb_func _gcm_init_v8
#endif
.align 4
_gcm_init_v8:
vld1.64 {q9},[r1] @ load input H
vmov.i8 q11,#0xe1
vshl.i64 q11,q11,#57 @ 0xc2.0
vext.8 q3,q9,q9,#8
vshr.u64 q10,q11,#63
vdup.32 q9,d18[1]
vext.8 q8,q10,q11,#8 @ t0=0xc2....01
vshr.u64 q10,q3,#63
vshr.s32 q9,q9,#31 @ broadcast carry bit
vand q10,q10,q8
vshl.i64 q3,q3,#1
vext.8 q10,q10,q10,#8
vand q8,q8,q9
vorr q3,q3,q10 @ H<<<=1
veor q12,q3,q8 @ twisted H
vst1.64 {q12},[r0]! @ store Htable[0]
@ calculate H^2
vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing
.byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12
veor q8,q8,q12
.byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12
.byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q14,q0,q10
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
veor q9,q9,q14
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
vst1.64 {q13,q14},[r0] @ store Htable[1..2]
bx lr
.globl _gcm_gmult_v8
.private_extern _gcm_gmult_v8
#ifdef __thumb2__
.thumb_func _gcm_gmult_v8
#endif
.align 4
_gcm_gmult_v8:
vld1.64 {q9},[r0] @ load Xi
vmov.i8 q11,#0xe1
vld1.64 {q12,q13},[r1] @ load twisted H, ...
vshl.u64 q11,q11,#57
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vext.8 q3,q9,q9,#8
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
vext.8 q0,q0,q0,#8
vst1.64 {q0},[r0] @ write out Xi
bx lr
.globl _gcm_ghash_v8
.private_extern _gcm_ghash_v8
#ifdef __thumb2__
.thumb_func _gcm_ghash_v8
#endif
.align 4
_gcm_ghash_v8:
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
vld1.64 {q0},[r0] @ load [rotated] Xi
@ "[rotated]" means that
@ loaded value would have
@ to be rotated in order to
@ make it appear as in
@ algorithm specification
subs r3,r3,#32 @ see if r3 is 32 or larger
mov r12,#16 @ r12 is used as post-
@ increment for input pointer;
@ as loop is modulo-scheduled
@ r12 is zeroed just in time
@ to preclude overstepping
@ inp[len], which means that
@ last block[s] are actually
@ loaded twice, but last
@ copy is not processed
vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2
vmov.i8 q11,#0xe1
vld1.64 {q14},[r1]
moveq r12,#0 @ is it time to zero r12?
vext.8 q0,q0,q0,#8 @ rotate Xi
vld1.64 {q8},[r2]! @ load [rotated] I[0]
vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant
#ifndef __ARMEB__
vrev64.8 q8,q8
vrev64.8 q0,q0
#endif
vext.8 q3,q8,q8,#8 @ rotate I[0]
blo Lodd_tail_v8 @ r3 was less than 32
vld1.64 {q9},[r2],r12 @ load [rotated] I[1]
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vext.8 q7,q9,q9,#8
veor q3,q3,q0 @ I[i]^=Xi
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
veor q9,q9,q7 @ Karatsuba pre-processing
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
b Loop_mod2x_v8
.align 4
Loop_mod2x_v8:
vext.8 q10,q3,q3,#8
subs r3,r3,#32 @ is there more data?
.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
movlo r12,#0 @ is it time to zero r12?
.byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9
veor q10,q10,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi
veor q0,q0,q4 @ accumulate
.byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2]
veor q2,q2,q6
moveq r12,#0 @ is it time to zero r12?
veor q1,q1,q5
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3]
#ifndef __ARMEB__
vrev64.8 q8,q8
#endif
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
vext.8 q7,q9,q9,#8
vext.8 q3,q8,q8,#8
veor q0,q1,q10
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
veor q3,q3,q2 @ accumulate q3 early
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q3,q3,q10
veor q9,q9,q7 @ Karatsuba pre-processing
veor q3,q3,q0
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
bhs Loop_mod2x_v8 @ there was at least 32 more bytes
veor q2,q2,q10
vext.8 q3,q8,q8,#8 @ re-construct q3
adds r3,r3,#32 @ re-construct r3
veor q0,q0,q2 @ re-construct q0
beq Ldone_v8 @ is r3 zero?
Lodd_tail_v8:
vext.8 q10,q0,q0,#8
veor q3,q3,q0 @ inp^=Xi
veor q9,q8,q10 @ q9 is rotated inp^Xi
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
Ldone_v8:
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
vext.8 q0,q0,q0,#8
vst1.64 {q0},[r0] @ write out Xi
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
bx lr
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM
#endif // defined(__arm__) && defined(__APPLE__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,260 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__arm__) && defined(__linux__)
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__arm__)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
#include <CBigNumBoringSSL_arm_arch.h>
.text
.fpu neon
.code 32
#undef __thumb2__
.globl gcm_init_v8
.hidden gcm_init_v8
.type gcm_init_v8,%function
.align 4
gcm_init_v8:
vld1.64 {q9},[r1] @ load input H
vmov.i8 q11,#0xe1
vshl.i64 q11,q11,#57 @ 0xc2.0
vext.8 q3,q9,q9,#8
vshr.u64 q10,q11,#63
vdup.32 q9,d18[1]
vext.8 q8,q10,q11,#8 @ t0=0xc2....01
vshr.u64 q10,q3,#63
vshr.s32 q9,q9,#31 @ broadcast carry bit
vand q10,q10,q8
vshl.i64 q3,q3,#1
vext.8 q10,q10,q10,#8
vand q8,q8,q9
vorr q3,q3,q10 @ H<<<=1
veor q12,q3,q8 @ twisted H
vst1.64 {q12},[r0]! @ store Htable[0]
@ calculate H^2
vext.8 q8,q12,q12,#8 @ Karatsuba pre-processing
.byte 0xa8,0x0e,0xa8,0xf2 @ pmull q0,q12,q12
veor q8,q8,q12
.byte 0xa9,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q12
.byte 0xa0,0x2e,0xa0,0xf2 @ pmull q1,q8,q8
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q14,q0,q10
vext.8 q9,q14,q14,#8 @ Karatsuba pre-processing
veor q9,q9,q14
vext.8 q13,q8,q9,#8 @ pack Karatsuba pre-processed
vst1.64 {q13,q14},[r0] @ store Htable[1..2]
bx lr
.size gcm_init_v8,.-gcm_init_v8
.globl gcm_gmult_v8
.hidden gcm_gmult_v8
.type gcm_gmult_v8,%function
.align 4
gcm_gmult_v8:
vld1.64 {q9},[r0] @ load Xi
vmov.i8 q11,#0xe1
vld1.64 {q12,q13},[r1] @ load twisted H, ...
vshl.u64 q11,q11,#57
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vext.8 q3,q9,q9,#8
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
vext.8 q0,q0,q0,#8
vst1.64 {q0},[r0] @ write out Xi
bx lr
.size gcm_gmult_v8,.-gcm_gmult_v8
.globl gcm_ghash_v8
.hidden gcm_ghash_v8
.type gcm_ghash_v8,%function
.align 4
gcm_ghash_v8:
vstmdb sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
vld1.64 {q0},[r0] @ load [rotated] Xi
@ "[rotated]" means that
@ loaded value would have
@ to be rotated in order to
@ make it appear as in
@ algorithm specification
subs r3,r3,#32 @ see if r3 is 32 or larger
mov r12,#16 @ r12 is used as post-
@ increment for input pointer;
@ as loop is modulo-scheduled
@ r12 is zeroed just in time
@ to preclude overstepping
@ inp[len], which means that
@ last block[s] are actually
@ loaded twice, but last
@ copy is not processed
vld1.64 {q12,q13},[r1]! @ load twisted H, ..., H^2
vmov.i8 q11,#0xe1
vld1.64 {q14},[r1]
moveq r12,#0 @ is it time to zero r12?
vext.8 q0,q0,q0,#8 @ rotate Xi
vld1.64 {q8},[r2]! @ load [rotated] I[0]
vshl.u64 q11,q11,#57 @ compose 0xc2.0 constant
#ifndef __ARMEB__
vrev64.8 q8,q8
vrev64.8 q0,q0
#endif
vext.8 q3,q8,q8,#8 @ rotate I[0]
blo .Lodd_tail_v8 @ r3 was less than 32
vld1.64 {q9},[r2],r12 @ load [rotated] I[1]
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vext.8 q7,q9,q9,#8
veor q3,q3,q0 @ I[i]^=Xi
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
veor q9,q9,q7 @ Karatsuba pre-processing
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
b .Loop_mod2x_v8
.align 4
.Loop_mod2x_v8:
vext.8 q10,q3,q3,#8
subs r3,r3,#32 @ is there more data?
.byte 0x86,0x0e,0xac,0xf2 @ pmull q0,q14,q3 @ H^2.lo·Xi.lo
movlo r12,#0 @ is it time to zero r12?
.byte 0xa2,0xae,0xaa,0xf2 @ pmull q5,q13,q9
veor q10,q10,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xad,0xf2 @ pmull2 q2,q14,q3 @ H^2.hi·Xi.hi
veor q0,q0,q4 @ accumulate
.byte 0xa5,0x2e,0xab,0xf2 @ pmull2 q1,q13,q10 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
vld1.64 {q8},[r2],r12 @ load [rotated] I[i+2]
veor q2,q2,q6
moveq r12,#0 @ is it time to zero r12?
veor q1,q1,q5
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
vld1.64 {q9},[r2],r12 @ load [rotated] I[i+3]
#ifndef __ARMEB__
vrev64.8 q8,q8
#endif
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
#ifndef __ARMEB__
vrev64.8 q9,q9
#endif
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
vext.8 q7,q9,q9,#8
vext.8 q3,q8,q8,#8
veor q0,q1,q10
.byte 0x8e,0x8e,0xa8,0xf2 @ pmull q4,q12,q7 @ H·Ii+1
veor q3,q3,q2 @ accumulate q3 early
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q3,q3,q10
veor q9,q9,q7 @ Karatsuba pre-processing
veor q3,q3,q0
.byte 0x8f,0xce,0xa9,0xf2 @ pmull2 q6,q12,q7
bhs .Loop_mod2x_v8 @ there was at least 32 more bytes
veor q2,q2,q10
vext.8 q3,q8,q8,#8 @ re-construct q3
adds r3,r3,#32 @ re-construct r3
veor q0,q0,q2 @ re-construct q0
beq .Ldone_v8 @ is r3 zero?
.Lodd_tail_v8:
vext.8 q10,q0,q0,#8
veor q3,q3,q0 @ inp^=Xi
veor q9,q8,q10 @ q9 is rotated inp^Xi
.byte 0x86,0x0e,0xa8,0xf2 @ pmull q0,q12,q3 @ H.lo·Xi.lo
veor q9,q9,q3 @ Karatsuba pre-processing
.byte 0x87,0x4e,0xa9,0xf2 @ pmull2 q2,q12,q3 @ H.hi·Xi.hi
.byte 0xa2,0x2e,0xaa,0xf2 @ pmull q1,q13,q9 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 q9,q0,q2,#8 @ Karatsuba post-processing
veor q10,q0,q2
veor q1,q1,q9
veor q1,q1,q10
.byte 0x26,0x4e,0xe0,0xf2 @ pmull q10,q0,q11 @ 1st phase of reduction
vmov d4,d3 @ Xh|Xm - 256-bit result
vmov d3,d0 @ Xm is rotated Xl
veor q0,q1,q10
vext.8 q10,q0,q0,#8 @ 2nd phase of reduction
.byte 0x26,0x0e,0xa0,0xf2 @ pmull q0,q0,q11
veor q10,q10,q2
veor q0,q0,q10
.Ldone_v8:
#ifndef __ARMEB__
vrev64.8 q0,q0
#endif
vext.8 q0,q0,q0,#8
vst1.64 {q0},[r0] @ write out Xi
vldmia sp!,{d8,d9,d10,d11,d12,d13,d14,d15} @ 32-bit ABI says so
bx lr
.size gcm_ghash_v8,.-gcm_ghash_v8
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits
#endif // defined(__arm__) && defined(__linux__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,253 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__aarch64__) && defined(__APPLE__)
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
#include <CBigNumBoringSSL_arm_arch.h>
.text
.globl _gcm_init_v8
.private_extern _gcm_init_v8
.align 4
_gcm_init_v8:
ld1 {v17.2d},[x1] //load input H
movi v19.16b,#0xe1
shl v19.2d,v19.2d,#57 //0xc2.0
ext v3.16b,v17.16b,v17.16b,#8
ushr v18.2d,v19.2d,#63
dup v17.4s,v17.s[1]
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
ushr v18.2d,v3.2d,#63
sshr v17.4s,v17.4s,#31 //broadcast carry bit
and v18.16b,v18.16b,v16.16b
shl v3.2d,v3.2d,#1
ext v18.16b,v18.16b,v18.16b,#8
and v16.16b,v16.16b,v17.16b
orr v3.16b,v3.16b,v18.16b //H<<<=1
eor v20.16b,v3.16b,v16.16b //twisted H
st1 {v20.2d},[x0],#16 //store Htable[0]
//calculate H^2
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
pmull v0.1q,v20.1d,v20.1d
eor v16.16b,v16.16b,v20.16b
pmull2 v2.1q,v20.2d,v20.2d
pmull v1.1q,v16.1d,v16.1d
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v22.16b,v0.16b,v18.16b
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
eor v17.16b,v17.16b,v22.16b
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
ret
.globl _gcm_gmult_v8
.private_extern _gcm_gmult_v8
.align 4
_gcm_gmult_v8:
ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
shl v19.2d,v19.2d,#57
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v3.16b,v17.16b,v17.16b,#8
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.globl _gcm_ghash_v8
.private_extern _gcm_ghash_v8
.align 4
_gcm_ghash_v8:
ld1 {v0.2d},[x0] //load [rotated] Xi
//"[rotated]" means that
//loaded value would have
//to be rotated in order to
//make it appear as in
//algorithm specification
subs x3,x3,#32 //see if x3 is 32 or larger
mov x12,#16 //x12 is used as post-
//increment for input pointer;
//as loop is modulo-scheduled
//x12 is zeroed just in time
//to preclude overstepping
//inp[len], which means that
//last block[s] are actually
//loaded twice, but last
//copy is not processed
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
movi v19.16b,#0xe1
ld1 {v22.2d},[x1]
csel x12,xzr,x12,eq //is it time to zero x12?
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
rev64 v0.16b,v0.16b
#endif
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
b.lo Lodd_tail_v8 //x3 was less than 32
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v7.16b,v17.16b,v17.16b,#8
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
pmull2 v6.1q,v20.2d,v7.2d
b Loop_mod2x_v8
.align 4
Loop_mod2x_v8:
ext v18.16b,v3.16b,v3.16b,#8
subs x3,x3,#32 //is there more data?
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
csel x12,xzr,x12,lo //is it time to zero x12?
pmull v5.1q,v21.1d,v17.1d
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
eor v0.16b,v0.16b,v4.16b //accumulate
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
eor v2.16b,v2.16b,v6.16b
csel x12,xzr,x12,eq //is it time to zero x12?
eor v1.16b,v1.16b,v5.16b
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
#endif
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
ext v7.16b,v17.16b,v17.16b,#8
ext v3.16b,v16.16b,v16.16b,#8
eor v0.16b,v1.16b,v18.16b
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v3.16b,v3.16b,v18.16b
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
eor v3.16b,v3.16b,v0.16b
pmull2 v6.1q,v20.2d,v7.2d
b.hs Loop_mod2x_v8 //there was at least 32 more bytes
eor v2.16b,v2.16b,v18.16b
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
adds x3,x3,#32 //re-construct x3
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
b.eq Ldone_v8 //is x3 zero?
Lodd_tail_v8:
ext v18.16b,v0.16b,v0.16b,#8
eor v3.16b,v3.16b,v0.16b //inp^=Xi
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
Ldone_v8:
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif // !OPENSSL_NO_ASM
#endif // defined(__aarch64__) && defined(__APPLE__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,256 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__aarch64__) && defined(__linux__)
// This file is generated from a similarly-named Perl script in the BoringSSL
// source tree. Do not edit by hand.
#if !defined(__has_feature)
#define __has_feature(x) 0
#endif
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#if !defined(OPENSSL_NO_ASM)
#if defined(__aarch64__)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
#include <CBigNumBoringSSL_arm_arch.h>
.text
.arch armv8-a+crypto
.globl gcm_init_v8
.hidden gcm_init_v8
.type gcm_init_v8,%function
.align 4
gcm_init_v8:
ld1 {v17.2d},[x1] //load input H
movi v19.16b,#0xe1
shl v19.2d,v19.2d,#57 //0xc2.0
ext v3.16b,v17.16b,v17.16b,#8
ushr v18.2d,v19.2d,#63
dup v17.4s,v17.s[1]
ext v16.16b,v18.16b,v19.16b,#8 //t0=0xc2....01
ushr v18.2d,v3.2d,#63
sshr v17.4s,v17.4s,#31 //broadcast carry bit
and v18.16b,v18.16b,v16.16b
shl v3.2d,v3.2d,#1
ext v18.16b,v18.16b,v18.16b,#8
and v16.16b,v16.16b,v17.16b
orr v3.16b,v3.16b,v18.16b //H<<<=1
eor v20.16b,v3.16b,v16.16b //twisted H
st1 {v20.2d},[x0],#16 //store Htable[0]
//calculate H^2
ext v16.16b,v20.16b,v20.16b,#8 //Karatsuba pre-processing
pmull v0.1q,v20.1d,v20.1d
eor v16.16b,v16.16b,v20.16b
pmull2 v2.1q,v20.2d,v20.2d
pmull v1.1q,v16.1d,v16.1d
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v22.16b,v0.16b,v18.16b
ext v17.16b,v22.16b,v22.16b,#8 //Karatsuba pre-processing
eor v17.16b,v17.16b,v22.16b
ext v21.16b,v16.16b,v17.16b,#8 //pack Karatsuba pre-processed
st1 {v21.2d,v22.2d},[x0] //store Htable[1..2]
ret
.size gcm_init_v8,.-gcm_init_v8
.globl gcm_gmult_v8
.hidden gcm_gmult_v8
.type gcm_gmult_v8,%function
.align 4
gcm_gmult_v8:
ld1 {v17.2d},[x0] //load Xi
movi v19.16b,#0xe1
ld1 {v20.2d,v21.2d},[x1] //load twisted H, ...
shl v19.2d,v19.2d,#57
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v3.16b,v17.16b,v17.16b,#8
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.size gcm_gmult_v8,.-gcm_gmult_v8
.globl gcm_ghash_v8
.hidden gcm_ghash_v8
.type gcm_ghash_v8,%function
.align 4
gcm_ghash_v8:
ld1 {v0.2d},[x0] //load [rotated] Xi
//"[rotated]" means that
//loaded value would have
//to be rotated in order to
//make it appear as in
//algorithm specification
subs x3,x3,#32 //see if x3 is 32 or larger
mov x12,#16 //x12 is used as post-
//increment for input pointer;
//as loop is modulo-scheduled
//x12 is zeroed just in time
//to preclude overstepping
//inp[len], which means that
//last block[s] are actually
//loaded twice, but last
//copy is not processed
ld1 {v20.2d,v21.2d},[x1],#32 //load twisted H, ..., H^2
movi v19.16b,#0xe1
ld1 {v22.2d},[x1]
csel x12,xzr,x12,eq //is it time to zero x12?
ext v0.16b,v0.16b,v0.16b,#8 //rotate Xi
ld1 {v16.2d},[x2],#16 //load [rotated] I[0]
shl v19.2d,v19.2d,#57 //compose 0xc2.0 constant
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
rev64 v0.16b,v0.16b
#endif
ext v3.16b,v16.16b,v16.16b,#8 //rotate I[0]
b.lo .Lodd_tail_v8 //x3 was less than 32
ld1 {v17.2d},[x2],x12 //load [rotated] I[1]
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ext v7.16b,v17.16b,v17.16b,#8
eor v3.16b,v3.16b,v0.16b //I[i]^=Xi
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
pmull2 v6.1q,v20.2d,v7.2d
b .Loop_mod2x_v8
.align 4
.Loop_mod2x_v8:
ext v18.16b,v3.16b,v3.16b,#8
subs x3,x3,#32 //is there more data?
pmull v0.1q,v22.1d,v3.1d //H^2.lo·Xi.lo
csel x12,xzr,x12,lo //is it time to zero x12?
pmull v5.1q,v21.1d,v17.1d
eor v18.16b,v18.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v22.2d,v3.2d //H^2.hi·Xi.hi
eor v0.16b,v0.16b,v4.16b //accumulate
pmull2 v1.1q,v21.2d,v18.2d //(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
ld1 {v16.2d},[x2],x12 //load [rotated] I[i+2]
eor v2.16b,v2.16b,v6.16b
csel x12,xzr,x12,eq //is it time to zero x12?
eor v1.16b,v1.16b,v5.16b
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
ld1 {v17.2d},[x2],x12 //load [rotated] I[i+3]
#ifndef __ARMEB__
rev64 v16.16b,v16.16b
#endif
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
#ifndef __ARMEB__
rev64 v17.16b,v17.16b
#endif
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
ext v7.16b,v17.16b,v17.16b,#8
ext v3.16b,v16.16b,v16.16b,#8
eor v0.16b,v1.16b,v18.16b
pmull v4.1q,v20.1d,v7.1d //H·Ii+1
eor v3.16b,v3.16b,v2.16b //accumulate v3.16b early
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v3.16b,v3.16b,v18.16b
eor v17.16b,v17.16b,v7.16b //Karatsuba pre-processing
eor v3.16b,v3.16b,v0.16b
pmull2 v6.1q,v20.2d,v7.2d
b.hs .Loop_mod2x_v8 //there was at least 32 more bytes
eor v2.16b,v2.16b,v18.16b
ext v3.16b,v16.16b,v16.16b,#8 //re-construct v3.16b
adds x3,x3,#32 //re-construct x3
eor v0.16b,v0.16b,v2.16b //re-construct v0.16b
b.eq .Ldone_v8 //is x3 zero?
.Lodd_tail_v8:
ext v18.16b,v0.16b,v0.16b,#8
eor v3.16b,v3.16b,v0.16b //inp^=Xi
eor v17.16b,v16.16b,v18.16b //v17.16b is rotated inp^Xi
pmull v0.1q,v20.1d,v3.1d //H.lo·Xi.lo
eor v17.16b,v17.16b,v3.16b //Karatsuba pre-processing
pmull2 v2.1q,v20.2d,v3.2d //H.hi·Xi.hi
pmull v1.1q,v21.1d,v17.1d //(H.lo+H.hi)·(Xi.lo+Xi.hi)
ext v17.16b,v0.16b,v2.16b,#8 //Karatsuba post-processing
eor v18.16b,v0.16b,v2.16b
eor v1.16b,v1.16b,v17.16b
eor v1.16b,v1.16b,v18.16b
pmull v18.1q,v0.1d,v19.1d //1st phase of reduction
ins v2.d[0],v1.d[1]
ins v1.d[1],v0.d[0]
eor v0.16b,v1.16b,v18.16b
ext v18.16b,v0.16b,v0.16b,#8 //2nd phase of reduction
pmull v0.1q,v0.1d,v19.1d
eor v18.16b,v18.16b,v2.16b
eor v0.16b,v0.16b,v18.16b
.Ldone_v8:
#ifndef __ARMEB__
rev64 v0.16b,v0.16b
#endif
ext v0.16b,v0.16b,v0.16b,#8
st1 {v0.2d},[x0] //write out Xi
ret
.size gcm_ghash_v8,.-gcm_ghash_v8
.byte 71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
.align 2
.align 2
#endif
#endif // !OPENSSL_NO_ASM
.section .note.GNU-stack,"",%progbits
#endif // defined(__aarch64__) && defined(__linux__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,695 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__i386__) && defined(__linux__)
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__i386__)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
.text
.globl md5_block_asm_data_order
.hidden md5_block_asm_data_order
.type md5_block_asm_data_order,@function
.align 16
md5_block_asm_data_order:
.L_md5_block_asm_data_order_begin:
pushl %esi
pushl %edi
movl 12(%esp),%edi
movl 16(%esp),%esi
movl 20(%esp),%ecx
pushl %ebp
shll $6,%ecx
pushl %ebx
addl %esi,%ecx
subl $64,%ecx
movl (%edi),%eax
pushl %ecx
movl 4(%edi),%ebx
movl 8(%edi),%ecx
movl 12(%edi),%edx
.L000start:
movl %ecx,%edi
movl (%esi),%ebp
xorl %edx,%edi
andl %ebx,%edi
leal 3614090360(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 4(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 3905402710(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 8(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 606105819(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 12(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 3250441966(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 16(%esi),%ebp
addl %ecx,%ebx
xorl %edx,%edi
andl %ebx,%edi
leal 4118548399(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 20(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 1200080426(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 24(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 2821735955(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 28(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 4249261313(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 32(%esi),%ebp
addl %ecx,%ebx
xorl %edx,%edi
andl %ebx,%edi
leal 1770035416(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 36(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 2336552879(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 40(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 4294925233(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 44(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 2304563134(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 48(%esi),%ebp
addl %ecx,%ebx
xorl %edx,%edi
andl %ebx,%edi
leal 1804603682(%eax,%ebp,1),%eax
xorl %edx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $7,%eax
movl 52(%esi),%ebp
addl %ebx,%eax
xorl %ecx,%edi
andl %eax,%edi
leal 4254626195(%edx,%ebp,1),%edx
xorl %ecx,%edi
addl %edi,%edx
movl %eax,%edi
roll $12,%edx
movl 56(%esi),%ebp
addl %eax,%edx
xorl %ebx,%edi
andl %edx,%edi
leal 2792965006(%ecx,%ebp,1),%ecx
xorl %ebx,%edi
addl %edi,%ecx
movl %edx,%edi
roll $17,%ecx
movl 60(%esi),%ebp
addl %edx,%ecx
xorl %eax,%edi
andl %ecx,%edi
leal 1236535329(%ebx,%ebp,1),%ebx
xorl %eax,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $22,%ebx
movl 4(%esi),%ebp
addl %ecx,%ebx
leal 4129170786(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 24(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 3225465664(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 44(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 643717713(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl (%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 3921069994(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 20(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
leal 3593408605(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 40(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 38016083(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 60(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 3634488961(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 16(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 3889429448(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 36(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
leal 568446438(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 56(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 3275163606(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 12(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 4107603335(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 32(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 1163531501(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 52(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
leal 2850285829(%eax,%ebp,1),%eax
xorl %ebx,%edi
andl %edx,%edi
movl 8(%esi),%ebp
xorl %ecx,%edi
addl %edi,%eax
movl %ebx,%edi
roll $5,%eax
addl %ebx,%eax
leal 4243563512(%edx,%ebp,1),%edx
xorl %eax,%edi
andl %ecx,%edi
movl 28(%esi),%ebp
xorl %ebx,%edi
addl %edi,%edx
movl %eax,%edi
roll $9,%edx
addl %eax,%edx
leal 1735328473(%ecx,%ebp,1),%ecx
xorl %edx,%edi
andl %ebx,%edi
movl 48(%esi),%ebp
xorl %eax,%edi
addl %edi,%ecx
movl %edx,%edi
roll $14,%ecx
addl %edx,%ecx
leal 2368359562(%ebx,%ebp,1),%ebx
xorl %ecx,%edi
andl %eax,%edi
movl 20(%esi),%ebp
xorl %edx,%edi
addl %edi,%ebx
movl %ecx,%edi
roll $20,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 4294588738(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 32(%esi),%ebp
movl %ebx,%edi
leal 2272392833(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 44(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 1839030562(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 56(%esi),%ebp
movl %edx,%edi
leal 4259657740(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 4(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 2763975236(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 16(%esi),%ebp
movl %ebx,%edi
leal 1272893353(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 28(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 4139469664(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 40(%esi),%ebp
movl %edx,%edi
leal 3200236656(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 52(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 681279174(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl (%esi),%ebp
movl %ebx,%edi
leal 3936430074(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 12(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 3572445317(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 24(%esi),%ebp
movl %edx,%edi
leal 76029189(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl 36(%esi),%ebp
addl %edi,%ebx
movl %ecx,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
xorl %ebx,%edi
leal 3654602809(%eax,%ebp,1),%eax
addl %edi,%eax
roll $4,%eax
movl 48(%esi),%ebp
movl %ebx,%edi
leal 3873151461(%edx,%ebp,1),%edx
addl %ebx,%eax
xorl %ecx,%edi
xorl %eax,%edi
movl 60(%esi),%ebp
addl %edi,%edx
movl %eax,%edi
roll $11,%edx
addl %eax,%edx
xorl %ebx,%edi
xorl %edx,%edi
leal 530742520(%ecx,%ebp,1),%ecx
addl %edi,%ecx
roll $16,%ecx
movl 8(%esi),%ebp
movl %edx,%edi
leal 3299628645(%ebx,%ebp,1),%ebx
addl %edx,%ecx
xorl %eax,%edi
xorl %ecx,%edi
movl (%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $23,%ebx
addl %ecx,%ebx
xorl %edx,%edi
orl %ebx,%edi
leal 4096336452(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 28(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 1126891415(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 56(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 2878612391(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 20(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 4237533241(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 48(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
orl %ebx,%edi
leal 1700485571(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 12(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 2399980690(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 40(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 4293915773(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 4(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 2240044497(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 32(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
orl %ebx,%edi
leal 1873313359(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 60(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 4264355552(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 24(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 2734768916(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 52(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 1309151649(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 16(%esi),%ebp
addl %edi,%ebx
movl $-1,%edi
roll $21,%ebx
xorl %edx,%edi
addl %ecx,%ebx
orl %ebx,%edi
leal 4149444226(%eax,%ebp,1),%eax
xorl %ecx,%edi
movl 44(%esi),%ebp
addl %edi,%eax
movl $-1,%edi
roll $6,%eax
xorl %ecx,%edi
addl %ebx,%eax
orl %eax,%edi
leal 3174756917(%edx,%ebp,1),%edx
xorl %ebx,%edi
movl 8(%esi),%ebp
addl %edi,%edx
movl $-1,%edi
roll $10,%edx
xorl %ebx,%edi
addl %eax,%edx
orl %edx,%edi
leal 718787259(%ecx,%ebp,1),%ecx
xorl %eax,%edi
movl 36(%esi),%ebp
addl %edi,%ecx
movl $-1,%edi
roll $15,%ecx
xorl %eax,%edi
addl %edx,%ecx
orl %ecx,%edi
leal 3951481745(%ebx,%ebp,1),%ebx
xorl %edx,%edi
movl 24(%esp),%ebp
addl %edi,%ebx
addl $64,%esi
roll $21,%ebx
movl (%ebp),%edi
addl %ecx,%ebx
addl %edi,%eax
movl 4(%ebp),%edi
addl %edi,%ebx
movl 8(%ebp),%edi
addl %edi,%ecx
movl 12(%ebp),%edi
addl %edi,%edx
movl %eax,(%ebp)
movl %ebx,4(%ebp)
movl (%esp),%edi
movl %ecx,8(%ebp)
movl %edx,12(%ebp)
cmpl %esi,%edi
jae .L000start
popl %eax
popl %ebx
popl %ebp
popl %edi
popl %esi
ret
.size md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
#endif
.section .note.GNU-stack,"",@progbits
#endif // defined(__i386__) && defined(__linux__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,709 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__x86_64__) && defined(__linux__)
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
.text
.align 16
.globl md5_block_asm_data_order
.hidden md5_block_asm_data_order
.type md5_block_asm_data_order,@function
md5_block_asm_data_order:
.cfi_startproc
pushq %rbp
.cfi_adjust_cfa_offset 8
.cfi_offset rbp,-16
pushq %rbx
.cfi_adjust_cfa_offset 8
.cfi_offset rbx,-24
pushq %r12
.cfi_adjust_cfa_offset 8
.cfi_offset r12,-32
pushq %r14
.cfi_adjust_cfa_offset 8
.cfi_offset r14,-40
pushq %r15
.cfi_adjust_cfa_offset 8
.cfi_offset r15,-48
.Lprologue:
movq %rdi,%rbp
shlq $6,%rdx
leaq (%rsi,%rdx,1),%rdi
movl 0(%rbp),%eax
movl 4(%rbp),%ebx
movl 8(%rbp),%ecx
movl 12(%rbp),%edx
cmpq %rdi,%rsi
je .Lend
.Lloop:
movl %eax,%r8d
movl %ebx,%r9d
movl %ecx,%r14d
movl %edx,%r15d
movl 0(%rsi),%r10d
movl %edx,%r11d
xorl %ecx,%r11d
leal -680876936(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 4(%rsi),%r10d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal -389564586(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 8(%rsi),%r10d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal 606105819(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 12(%rsi),%r10d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal -1044525330(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 16(%rsi),%r10d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
xorl %ecx,%r11d
leal -176418897(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 20(%rsi),%r10d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal 1200080426(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 24(%rsi),%r10d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal -1473231341(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 28(%rsi),%r10d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal -45705983(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 32(%rsi),%r10d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
xorl %ecx,%r11d
leal 1770035416(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 36(%rsi),%r10d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal -1958414417(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 40(%rsi),%r10d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal -42063(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 44(%rsi),%r10d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal -1990404162(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 48(%rsi),%r10d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
xorl %ecx,%r11d
leal 1804603682(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 52(%rsi),%r10d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal -40341101(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 56(%rsi),%r10d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal -1502002290(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 60(%rsi),%r10d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal 1236535329(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 0(%rsi),%r10d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
movl 4(%rsi),%r10d
movl %edx,%r11d
movl %edx,%r12d
notl %r11d
leal -165796510(%rax,%r10,1),%eax
andl %ebx,%r12d
andl %ecx,%r11d
movl 24(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal -1069501632(%rdx,%r10,1),%edx
andl %eax,%r12d
andl %ebx,%r11d
movl 44(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
leal 643717713(%rcx,%r10,1),%ecx
andl %edx,%r12d
andl %eax,%r11d
movl 0(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal -373897302(%rbx,%r10,1),%ebx
andl %ecx,%r12d
andl %edx,%r11d
movl 20(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
notl %r11d
leal -701558691(%rax,%r10,1),%eax
andl %ebx,%r12d
andl %ecx,%r11d
movl 40(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal 38016083(%rdx,%r10,1),%edx
andl %eax,%r12d
andl %ebx,%r11d
movl 60(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
leal -660478335(%rcx,%r10,1),%ecx
andl %edx,%r12d
andl %eax,%r11d
movl 16(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal -405537848(%rbx,%r10,1),%ebx
andl %ecx,%r12d
andl %edx,%r11d
movl 36(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
notl %r11d
leal 568446438(%rax,%r10,1),%eax
andl %ebx,%r12d
andl %ecx,%r11d
movl 56(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal -1019803690(%rdx,%r10,1),%edx
andl %eax,%r12d
andl %ebx,%r11d
movl 12(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
leal -187363961(%rcx,%r10,1),%ecx
andl %edx,%r12d
andl %eax,%r11d
movl 32(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal 1163531501(%rbx,%r10,1),%ebx
andl %ecx,%r12d
andl %edx,%r11d
movl 52(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
notl %r11d
leal -1444681467(%rax,%r10,1),%eax
andl %ebx,%r12d
andl %ecx,%r11d
movl 8(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal -51403784(%rdx,%r10,1),%edx
andl %eax,%r12d
andl %ebx,%r11d
movl 28(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
leal 1735328473(%rcx,%r10,1),%ecx
andl %edx,%r12d
andl %eax,%r11d
movl 48(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal -1926607734(%rbx,%r10,1),%ebx
andl %ecx,%r12d
andl %edx,%r11d
movl 0(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
movl 20(%rsi),%r10d
movl %ecx,%r11d
leal -378558(%rax,%r10,1),%eax
movl 32(%rsi),%r10d
xorl %edx,%r11d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
addl %ebx,%eax
leal -2022574463(%rdx,%r10,1),%edx
movl 44(%rsi),%r10d
xorl %ecx,%r11d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal 1839030562(%rcx,%r10,1),%ecx
movl 56(%rsi),%r10d
xorl %ebx,%r11d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
addl %edx,%ecx
leal -35309556(%rbx,%r10,1),%ebx
movl 4(%rsi),%r10d
xorl %eax,%r11d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
leal -1530992060(%rax,%r10,1),%eax
movl 16(%rsi),%r10d
xorl %edx,%r11d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
addl %ebx,%eax
leal 1272893353(%rdx,%r10,1),%edx
movl 28(%rsi),%r10d
xorl %ecx,%r11d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal -155497632(%rcx,%r10,1),%ecx
movl 40(%rsi),%r10d
xorl %ebx,%r11d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
addl %edx,%ecx
leal -1094730640(%rbx,%r10,1),%ebx
movl 52(%rsi),%r10d
xorl %eax,%r11d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
leal 681279174(%rax,%r10,1),%eax
movl 0(%rsi),%r10d
xorl %edx,%r11d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
addl %ebx,%eax
leal -358537222(%rdx,%r10,1),%edx
movl 12(%rsi),%r10d
xorl %ecx,%r11d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal -722521979(%rcx,%r10,1),%ecx
movl 24(%rsi),%r10d
xorl %ebx,%r11d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
addl %edx,%ecx
leal 76029189(%rbx,%r10,1),%ebx
movl 36(%rsi),%r10d
xorl %eax,%r11d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
leal -640364487(%rax,%r10,1),%eax
movl 48(%rsi),%r10d
xorl %edx,%r11d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
addl %ebx,%eax
leal -421815835(%rdx,%r10,1),%edx
movl 60(%rsi),%r10d
xorl %ecx,%r11d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal 530742520(%rcx,%r10,1),%ecx
movl 8(%rsi),%r10d
xorl %ebx,%r11d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
addl %edx,%ecx
leal -995338651(%rbx,%r10,1),%ebx
movl 0(%rsi),%r10d
xorl %eax,%r11d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
movl 0(%rsi),%r10d
movl $0xffffffff,%r11d
xorl %edx,%r11d
leal -198630844(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 28(%rsi),%r10d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal 1126891415(%rdx,%r10,1),%edx
orl %eax,%r11d
xorl %ebx,%r11d
addl %r11d,%edx
movl 56(%rsi),%r10d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal -1416354905(%rcx,%r10,1),%ecx
orl %edx,%r11d
xorl %eax,%r11d
addl %r11d,%ecx
movl 20(%rsi),%r10d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal -57434055(%rbx,%r10,1),%ebx
orl %ecx,%r11d
xorl %edx,%r11d
addl %r11d,%ebx
movl 48(%rsi),%r10d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
leal 1700485571(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 12(%rsi),%r10d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal -1894986606(%rdx,%r10,1),%edx
orl %eax,%r11d
xorl %ebx,%r11d
addl %r11d,%edx
movl 40(%rsi),%r10d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal -1051523(%rcx,%r10,1),%ecx
orl %edx,%r11d
xorl %eax,%r11d
addl %r11d,%ecx
movl 4(%rsi),%r10d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal -2054922799(%rbx,%r10,1),%ebx
orl %ecx,%r11d
xorl %edx,%r11d
addl %r11d,%ebx
movl 32(%rsi),%r10d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
leal 1873313359(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 60(%rsi),%r10d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal -30611744(%rdx,%r10,1),%edx
orl %eax,%r11d
xorl %ebx,%r11d
addl %r11d,%edx
movl 24(%rsi),%r10d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal -1560198380(%rcx,%r10,1),%ecx
orl %edx,%r11d
xorl %eax,%r11d
addl %r11d,%ecx
movl 52(%rsi),%r10d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal 1309151649(%rbx,%r10,1),%ebx
orl %ecx,%r11d
xorl %edx,%r11d
addl %r11d,%ebx
movl 16(%rsi),%r10d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
leal -145523070(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 44(%rsi),%r10d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal -1120210379(%rdx,%r10,1),%edx
orl %eax,%r11d
xorl %ebx,%r11d
addl %r11d,%edx
movl 8(%rsi),%r10d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal 718787259(%rcx,%r10,1),%ecx
orl %edx,%r11d
xorl %eax,%r11d
addl %r11d,%ecx
movl 36(%rsi),%r10d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal -343485551(%rbx,%r10,1),%ebx
orl %ecx,%r11d
xorl %edx,%r11d
addl %r11d,%ebx
movl 0(%rsi),%r10d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
addl %r8d,%eax
addl %r9d,%ebx
addl %r14d,%ecx
addl %r15d,%edx
addq $64,%rsi
cmpq %rdi,%rsi
jb .Lloop
.Lend:
movl %eax,0(%rbp)
movl %ebx,4(%rbp)
movl %ecx,8(%rbp)
movl %edx,12(%rbp)
movq (%rsp),%r15
.cfi_restore r15
movq 8(%rsp),%r14
.cfi_restore r14
movq 16(%rsp),%r12
.cfi_restore r12
movq 24(%rsp),%rbx
.cfi_restore rbx
movq 32(%rsp),%rbp
.cfi_restore rbp
addq $40,%rsp
.cfi_adjust_cfa_offset -40
.Lepilogue:
.byte 0xf3,0xc3
.cfi_endproc
.size md5_block_asm_data_order,.-md5_block_asm_data_order
#endif
.section .note.GNU-stack,"",@progbits
#endif // defined(__x86_64__) && defined(__linux__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,703 @@
#define BORINGSSL_PREFIX CBigNumBoringSSL
#if defined(__x86_64__) && defined(__APPLE__)
# This file is generated from a similarly-named Perl script in the BoringSSL
# source tree. Do not edit by hand.
#if defined(__has_feature)
#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
#define OPENSSL_NO_ASM
#endif
#endif
#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
#if defined(BORINGSSL_PREFIX)
#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
#endif
.text
.p2align 4
.globl _md5_block_asm_data_order
.private_extern _md5_block_asm_data_order
_md5_block_asm_data_order:
pushq %rbp
pushq %rbx
pushq %r12
pushq %r14
pushq %r15
L$prologue:
movq %rdi,%rbp
shlq $6,%rdx
leaq (%rsi,%rdx,1),%rdi
movl 0(%rbp),%eax
movl 4(%rbp),%ebx
movl 8(%rbp),%ecx
movl 12(%rbp),%edx
cmpq %rdi,%rsi
je L$end
L$loop:
movl %eax,%r8d
movl %ebx,%r9d
movl %ecx,%r14d
movl %edx,%r15d
movl 0(%rsi),%r10d
movl %edx,%r11d
xorl %ecx,%r11d
leal -680876936(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 4(%rsi),%r10d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal -389564586(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 8(%rsi),%r10d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal 606105819(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 12(%rsi),%r10d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal -1044525330(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 16(%rsi),%r10d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
xorl %ecx,%r11d
leal -176418897(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 20(%rsi),%r10d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal 1200080426(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 24(%rsi),%r10d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal -1473231341(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 28(%rsi),%r10d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal -45705983(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 32(%rsi),%r10d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
xorl %ecx,%r11d
leal 1770035416(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 36(%rsi),%r10d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal -1958414417(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 40(%rsi),%r10d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal -42063(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 44(%rsi),%r10d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal -1990404162(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 48(%rsi),%r10d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
xorl %ecx,%r11d
leal 1804603682(%rax,%r10,1),%eax
andl %ebx,%r11d
xorl %edx,%r11d
movl 52(%rsi),%r10d
addl %r11d,%eax
roll $7,%eax
movl %ecx,%r11d
addl %ebx,%eax
xorl %ebx,%r11d
leal -40341101(%rdx,%r10,1),%edx
andl %eax,%r11d
xorl %ecx,%r11d
movl 56(%rsi),%r10d
addl %r11d,%edx
roll $12,%edx
movl %ebx,%r11d
addl %eax,%edx
xorl %eax,%r11d
leal -1502002290(%rcx,%r10,1),%ecx
andl %edx,%r11d
xorl %ebx,%r11d
movl 60(%rsi),%r10d
addl %r11d,%ecx
roll $17,%ecx
movl %eax,%r11d
addl %edx,%ecx
xorl %edx,%r11d
leal 1236535329(%rbx,%r10,1),%ebx
andl %ecx,%r11d
xorl %eax,%r11d
movl 0(%rsi),%r10d
addl %r11d,%ebx
roll $22,%ebx
movl %edx,%r11d
addl %ecx,%ebx
movl 4(%rsi),%r10d
movl %edx,%r11d
movl %edx,%r12d
notl %r11d
leal -165796510(%rax,%r10,1),%eax
andl %ebx,%r12d
andl %ecx,%r11d
movl 24(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal -1069501632(%rdx,%r10,1),%edx
andl %eax,%r12d
andl %ebx,%r11d
movl 44(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
leal 643717713(%rcx,%r10,1),%ecx
andl %edx,%r12d
andl %eax,%r11d
movl 0(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal -373897302(%rbx,%r10,1),%ebx
andl %ecx,%r12d
andl %edx,%r11d
movl 20(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
notl %r11d
leal -701558691(%rax,%r10,1),%eax
andl %ebx,%r12d
andl %ecx,%r11d
movl 40(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal 38016083(%rdx,%r10,1),%edx
andl %eax,%r12d
andl %ebx,%r11d
movl 60(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
leal -660478335(%rcx,%r10,1),%ecx
andl %edx,%r12d
andl %eax,%r11d
movl 16(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal -405537848(%rbx,%r10,1),%ebx
andl %ecx,%r12d
andl %edx,%r11d
movl 36(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
notl %r11d
leal 568446438(%rax,%r10,1),%eax
andl %ebx,%r12d
andl %ecx,%r11d
movl 56(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal -1019803690(%rdx,%r10,1),%edx
andl %eax,%r12d
andl %ebx,%r11d
movl 12(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
leal -187363961(%rcx,%r10,1),%ecx
andl %edx,%r12d
andl %eax,%r11d
movl 32(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal 1163531501(%rbx,%r10,1),%ebx
andl %ecx,%r12d
andl %edx,%r11d
movl 52(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
notl %r11d
leal -1444681467(%rax,%r10,1),%eax
andl %ebx,%r12d
andl %ecx,%r11d
movl 8(%rsi),%r10d
orl %r11d,%r12d
movl %ecx,%r11d
addl %r12d,%eax
movl %ecx,%r12d
roll $5,%eax
addl %ebx,%eax
notl %r11d
leal -51403784(%rdx,%r10,1),%edx
andl %eax,%r12d
andl %ebx,%r11d
movl 28(%rsi),%r10d
orl %r11d,%r12d
movl %ebx,%r11d
addl %r12d,%edx
movl %ebx,%r12d
roll $9,%edx
addl %eax,%edx
notl %r11d
leal 1735328473(%rcx,%r10,1),%ecx
andl %edx,%r12d
andl %eax,%r11d
movl 48(%rsi),%r10d
orl %r11d,%r12d
movl %eax,%r11d
addl %r12d,%ecx
movl %eax,%r12d
roll $14,%ecx
addl %edx,%ecx
notl %r11d
leal -1926607734(%rbx,%r10,1),%ebx
andl %ecx,%r12d
andl %edx,%r11d
movl 0(%rsi),%r10d
orl %r11d,%r12d
movl %edx,%r11d
addl %r12d,%ebx
movl %edx,%r12d
roll $20,%ebx
addl %ecx,%ebx
movl 20(%rsi),%r10d
movl %ecx,%r11d
leal -378558(%rax,%r10,1),%eax
movl 32(%rsi),%r10d
xorl %edx,%r11d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
addl %ebx,%eax
leal -2022574463(%rdx,%r10,1),%edx
movl 44(%rsi),%r10d
xorl %ecx,%r11d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal 1839030562(%rcx,%r10,1),%ecx
movl 56(%rsi),%r10d
xorl %ebx,%r11d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
addl %edx,%ecx
leal -35309556(%rbx,%r10,1),%ebx
movl 4(%rsi),%r10d
xorl %eax,%r11d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
leal -1530992060(%rax,%r10,1),%eax
movl 16(%rsi),%r10d
xorl %edx,%r11d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
addl %ebx,%eax
leal 1272893353(%rdx,%r10,1),%edx
movl 28(%rsi),%r10d
xorl %ecx,%r11d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal -155497632(%rcx,%r10,1),%ecx
movl 40(%rsi),%r10d
xorl %ebx,%r11d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
addl %edx,%ecx
leal -1094730640(%rbx,%r10,1),%ebx
movl 52(%rsi),%r10d
xorl %eax,%r11d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
leal 681279174(%rax,%r10,1),%eax
movl 0(%rsi),%r10d
xorl %edx,%r11d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
addl %ebx,%eax
leal -358537222(%rdx,%r10,1),%edx
movl 12(%rsi),%r10d
xorl %ecx,%r11d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal -722521979(%rcx,%r10,1),%ecx
movl 24(%rsi),%r10d
xorl %ebx,%r11d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
addl %edx,%ecx
leal 76029189(%rbx,%r10,1),%ebx
movl 36(%rsi),%r10d
xorl %eax,%r11d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
leal -640364487(%rax,%r10,1),%eax
movl 48(%rsi),%r10d
xorl %edx,%r11d
xorl %ebx,%r11d
addl %r11d,%eax
roll $4,%eax
movl %ebx,%r11d
addl %ebx,%eax
leal -421815835(%rdx,%r10,1),%edx
movl 60(%rsi),%r10d
xorl %ecx,%r11d
xorl %eax,%r11d
addl %r11d,%edx
roll $11,%edx
movl %eax,%r11d
addl %eax,%edx
leal 530742520(%rcx,%r10,1),%ecx
movl 8(%rsi),%r10d
xorl %ebx,%r11d
xorl %edx,%r11d
addl %r11d,%ecx
roll $16,%ecx
movl %edx,%r11d
addl %edx,%ecx
leal -995338651(%rbx,%r10,1),%ebx
movl 0(%rsi),%r10d
xorl %eax,%r11d
xorl %ecx,%r11d
addl %r11d,%ebx
roll $23,%ebx
movl %ecx,%r11d
addl %ecx,%ebx
movl 0(%rsi),%r10d
movl $0xffffffff,%r11d
xorl %edx,%r11d
leal -198630844(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 28(%rsi),%r10d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal 1126891415(%rdx,%r10,1),%edx
orl %eax,%r11d
xorl %ebx,%r11d
addl %r11d,%edx
movl 56(%rsi),%r10d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal -1416354905(%rcx,%r10,1),%ecx
orl %edx,%r11d
xorl %eax,%r11d
addl %r11d,%ecx
movl 20(%rsi),%r10d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal -57434055(%rbx,%r10,1),%ebx
orl %ecx,%r11d
xorl %edx,%r11d
addl %r11d,%ebx
movl 48(%rsi),%r10d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
leal 1700485571(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 12(%rsi),%r10d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal -1894986606(%rdx,%r10,1),%edx
orl %eax,%r11d
xorl %ebx,%r11d
addl %r11d,%edx
movl 40(%rsi),%r10d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal -1051523(%rcx,%r10,1),%ecx
orl %edx,%r11d
xorl %eax,%r11d
addl %r11d,%ecx
movl 4(%rsi),%r10d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal -2054922799(%rbx,%r10,1),%ebx
orl %ecx,%r11d
xorl %edx,%r11d
addl %r11d,%ebx
movl 32(%rsi),%r10d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
leal 1873313359(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 60(%rsi),%r10d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal -30611744(%rdx,%r10,1),%edx
orl %eax,%r11d
xorl %ebx,%r11d
addl %r11d,%edx
movl 24(%rsi),%r10d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal -1560198380(%rcx,%r10,1),%ecx
orl %edx,%r11d
xorl %eax,%r11d
addl %r11d,%ecx
movl 52(%rsi),%r10d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal 1309151649(%rbx,%r10,1),%ebx
orl %ecx,%r11d
xorl %edx,%r11d
addl %r11d,%ebx
movl 16(%rsi),%r10d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
leal -145523070(%rax,%r10,1),%eax
orl %ebx,%r11d
xorl %ecx,%r11d
addl %r11d,%eax
movl 44(%rsi),%r10d
movl $0xffffffff,%r11d
roll $6,%eax
xorl %ecx,%r11d
addl %ebx,%eax
leal -1120210379(%rdx,%r10,1),%edx
orl %eax,%r11d
xorl %ebx,%r11d
addl %r11d,%edx
movl 8(%rsi),%r10d
movl $0xffffffff,%r11d
roll $10,%edx
xorl %ebx,%r11d
addl %eax,%edx
leal 718787259(%rcx,%r10,1),%ecx
orl %edx,%r11d
xorl %eax,%r11d
addl %r11d,%ecx
movl 36(%rsi),%r10d
movl $0xffffffff,%r11d
roll $15,%ecx
xorl %eax,%r11d
addl %edx,%ecx
leal -343485551(%rbx,%r10,1),%ebx
orl %ecx,%r11d
xorl %edx,%r11d
addl %r11d,%ebx
movl 0(%rsi),%r10d
movl $0xffffffff,%r11d
roll $21,%ebx
xorl %edx,%r11d
addl %ecx,%ebx
addl %r8d,%eax
addl %r9d,%ebx
addl %r14d,%ecx
addl %r15d,%edx
addq $64,%rsi
cmpq %rdi,%rsi
jb L$loop
L$end:
movl %eax,0(%rbp)
movl %ebx,4(%rbp)
movl %ecx,8(%rbp)
movl %edx,12(%rbp)
movq (%rsp),%r15
movq 8(%rsp),%r14
movq 16(%rsp),%r12
movq 24(%rsp),%rbx
movq 32(%rsp),%rbp
addq $40,%rsp
L$epilogue:
.byte 0xf3,0xc3
#endif
#endif // defined(__x86_64__) && defined(__APPLE__)
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif

View File

@ -0,0 +1,167 @@
/* ====================================================================
* Copyright (c) 2008 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ==================================================================== */
#include <assert.h>
#include <string.h>
#include <CBigNumBoringSSL_type_check.h>
#include "internal.h"
void CRYPTO_cbc128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16],
block128_f block) {
size_t n;
const uint8_t *iv = ivec;
assert(key != NULL && ivec != NULL);
assert(len == 0 || (in != NULL && out != NULL));
while (len >= 16) {
for (n = 0; n < 16; n += sizeof(size_t)) {
store_word_le(out + n, load_word_le(in + n) ^ load_word_le(iv + n));
}
(*block)(out, out, key);
iv = out;
len -= 16;
in += 16;
out += 16;
}
while (len) {
for (n = 0; n < 16 && n < len; ++n) {
out[n] = in[n] ^ iv[n];
}
for (; n < 16; ++n) {
out[n] = iv[n];
}
(*block)(out, out, key);
iv = out;
if (len <= 16) {
break;
}
len -= 16;
in += 16;
out += 16;
}
OPENSSL_memcpy(ivec, iv, 16);
}
void CRYPTO_cbc128_decrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16],
block128_f block) {
size_t n;
union {
size_t t[16 / sizeof(size_t)];
uint8_t c[16];
} tmp;
assert(key != NULL && ivec != NULL);
assert(len == 0 || (in != NULL && out != NULL));
const uintptr_t inptr = (uintptr_t) in;
const uintptr_t outptr = (uintptr_t) out;
// If |in| and |out| alias, |in| must be ahead.
assert(inptr >= outptr || inptr + len <= outptr);
if ((inptr >= 32 && outptr <= inptr - 32) || inptr < outptr) {
// If |out| is at least two blocks behind |in| or completely disjoint, there
// is no need to decrypt to a temporary block.
OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
"block cannot be evenly divided into words");
const uint8_t *iv = ivec;
while (len >= 16) {
(*block)(in, out, key);
for (n = 0; n < 16; n += sizeof(size_t)) {
store_word_le(out + n, load_word_le(out + n) ^ load_word_le(iv + n));
}
iv = in;
len -= 16;
in += 16;
out += 16;
}
OPENSSL_memcpy(ivec, iv, 16);
} else {
OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
"block cannot be evenly divided into words");
while (len >= 16) {
(*block)(in, tmp.c, key);
for (n = 0; n < 16; n += sizeof(size_t)) {
size_t c = load_word_le(in + n);
store_word_le(out + n,
tmp.t[n / sizeof(size_t)] ^ load_word_le(ivec + n));
store_word_le(ivec + n, c);
}
len -= 16;
in += 16;
out += 16;
}
}
while (len) {
uint8_t c;
(*block)(in, tmp.c, key);
for (n = 0; n < 16 && n < len; ++n) {
c = in[n];
out[n] = tmp.c[n] ^ ivec[n];
ivec[n] = c;
}
if (len <= 16) {
for (; n < 16; ++n) {
ivec[n] = in[n];
}
break;
}
len -= 16;
in += 16;
out += 16;
}
}

View File

@ -0,0 +1,202 @@
/* ====================================================================
* Copyright (c) 2008 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ==================================================================== */
#include <CBigNumBoringSSL_type_check.h>
#include <assert.h>
#include <string.h>
#include "internal.h"
OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
"block cannot be divided into size_t");
void CRYPTO_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16], unsigned *num,
int enc, block128_f block) {
assert(in && out && key && ivec && num);
unsigned n = *num;
if (enc) {
while (n && len) {
*(out++) = ivec[n] ^= *(in++);
--len;
n = (n + 1) % 16;
}
while (len >= 16) {
(*block)(ivec, ivec, key);
for (; n < 16; n += sizeof(size_t)) {
size_t tmp = load_word_le(ivec + n) ^ load_word_le(in + n);
store_word_le(ivec + n, tmp);
store_word_le(out + n, tmp);
}
len -= 16;
out += 16;
in += 16;
n = 0;
}
if (len) {
(*block)(ivec, ivec, key);
while (len--) {
out[n] = ivec[n] ^= in[n];
++n;
}
}
*num = n;
return;
} else {
while (n && len) {
uint8_t c;
*(out++) = ivec[n] ^ (c = *(in++));
ivec[n] = c;
--len;
n = (n + 1) % 16;
}
while (len >= 16) {
(*block)(ivec, ivec, key);
for (; n < 16; n += sizeof(size_t)) {
size_t t = load_word_le(in + n);
store_word_le(out + n, load_word_le(ivec + n) ^ t);
store_word_le(ivec + n, t);
}
len -= 16;
out += 16;
in += 16;
n = 0;
}
if (len) {
(*block)(ivec, ivec, key);
while (len--) {
uint8_t c;
out[n] = ivec[n] ^ (c = in[n]);
ivec[n] = c;
++n;
}
}
*num = n;
return;
}
}
/* This expects a single block of size nbits for both in and out. Note that
it corrupts any extra bits in the last byte of out */
static void cfbr_encrypt_block(const uint8_t *in, uint8_t *out, unsigned nbits,
const AES_KEY *key, uint8_t ivec[16], int enc,
block128_f block) {
int n, rem, num;
uint8_t ovec[16 * 2 + 1]; /* +1 because we dererefence (but don't use) one
byte off the end */
if (nbits <= 0 || nbits > 128) {
return;
}
// fill in the first half of the new IV with the current IV
OPENSSL_memcpy(ovec, ivec, 16);
// construct the new IV
(*block)(ivec, ivec, key);
num = (nbits + 7) / 8;
if (enc) {
// encrypt the input
for (n = 0; n < num; ++n) {
out[n] = (ovec[16 + n] = in[n] ^ ivec[n]);
}
} else {
// decrypt the input
for (n = 0; n < num; ++n) {
out[n] = (ovec[16 + n] = in[n]) ^ ivec[n];
}
}
// shift ovec left...
rem = nbits % 8;
num = nbits / 8;
if (rem == 0) {
OPENSSL_memcpy(ivec, ovec + num, 16);
} else {
for (n = 0; n < 16; ++n) {
ivec[n] = ovec[n + num] << rem | ovec[n + num + 1] >> (8 - rem);
}
}
// it is not necessary to cleanse ovec, since the IV is not secret
}
// N.B. This expects the input to be packed, MS bit first
void CRYPTO_cfb128_1_encrypt(const uint8_t *in, uint8_t *out, size_t bits,
const AES_KEY *key, uint8_t ivec[16],
unsigned *num, int enc, block128_f block) {
size_t n;
uint8_t c[1], d[1];
assert(in && out && key && ivec && num);
assert(*num == 0);
for (n = 0; n < bits; ++n) {
c[0] = (in[n / 8] & (1 << (7 - n % 8))) ? 0x80 : 0;
cfbr_encrypt_block(c, d, 1, key, ivec, enc, block);
out[n / 8] = (out[n / 8] & ~(1 << (unsigned int)(7 - n % 8))) |
((d[0] & 0x80) >> (unsigned int)(n % 8));
}
}
void CRYPTO_cfb128_8_encrypt(const unsigned char *in, unsigned char *out,
size_t length, const AES_KEY *key,
unsigned char ivec[16], unsigned *num, int enc,
block128_f block) {
size_t n;
assert(in && out && key && ivec && num);
assert(*num == 0);
for (n = 0; n < length; ++n) {
cfbr_encrypt_block(&in[n], &out[n], 8, key, ivec, enc, block);
}
}

View File

@ -0,0 +1,200 @@
/* ====================================================================
* Copyright (c) 2008 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ==================================================================== */
#include <CBigNumBoringSSL_type_check.h>
#include <assert.h>
#include <string.h>
#include "internal.h"
// NOTE: the IV/counter CTR mode is big-endian. The code itself
// is endian-neutral.
// increment counter (128-bit int) by 1
static void ctr128_inc(uint8_t *counter) {
uint32_t n = 16, c = 1;
do {
--n;
c += counter[n];
counter[n] = (uint8_t) c;
c >>= 8;
} while (n);
}
OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
"block cannot be divided into size_t");
// The input encrypted as though 128bit counter mode is being used. The extra
// state information to record how much of the 128bit block we have used is
// contained in *num, and the encrypted counter is kept in ecount_buf. Both
// *num and ecount_buf must be initialised with zeros before the first call to
// CRYPTO_ctr128_encrypt().
//
// This algorithm assumes that the counter is in the x lower bits of the IV
// (ivec), and that the application has full control over overflow and the rest
// of the IV. This implementation takes NO responsibility for checking that
// the counter doesn't overflow into the rest of the IV when incremented.
void CRYPTO_ctr128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16],
uint8_t ecount_buf[16], unsigned int *num,
block128_f block) {
unsigned int n;
assert(key && ecount_buf && num);
assert(len == 0 || (in && out));
assert(*num < 16);
n = *num;
while (n && len) {
*(out++) = *(in++) ^ ecount_buf[n];
--len;
n = (n + 1) % 16;
}
while (len >= 16) {
(*block)(ivec, ecount_buf, key);
ctr128_inc(ivec);
for (n = 0; n < 16; n += sizeof(size_t)) {
store_word_le(out + n,
load_word_le(in + n) ^ load_word_le(ecount_buf + n));
}
len -= 16;
out += 16;
in += 16;
n = 0;
}
if (len) {
(*block)(ivec, ecount_buf, key);
ctr128_inc(ivec);
while (len--) {
out[n] = in[n] ^ ecount_buf[n];
++n;
}
}
*num = n;
}
// increment upper 96 bits of 128-bit counter by 1
static void ctr96_inc(uint8_t *counter) {
uint32_t n = 12, c = 1;
do {
--n;
c += counter[n];
counter[n] = (uint8_t) c;
c >>= 8;
} while (n);
}
void CRYPTO_ctr128_encrypt_ctr32(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16],
uint8_t ecount_buf[16], unsigned int *num,
ctr128_f func) {
unsigned int n, ctr32;
assert(key && ecount_buf && num);
assert(len == 0 || (in && out));
assert(*num < 16);
n = *num;
while (n && len) {
*(out++) = *(in++) ^ ecount_buf[n];
--len;
n = (n + 1) % 16;
}
ctr32 = GETU32(ivec + 12);
while (len >= 16) {
size_t blocks = len / 16;
// 1<<28 is just a not-so-small yet not-so-large number...
// Below condition is practically never met, but it has to
// be checked for code correctness.
if (sizeof(size_t) > sizeof(unsigned int) && blocks > (1U << 28)) {
blocks = (1U << 28);
}
// As (*func) operates on 32-bit counter, caller
// has to handle overflow. 'if' below detects the
// overflow, which is then handled by limiting the
// amount of blocks to the exact overflow point...
ctr32 += (uint32_t)blocks;
if (ctr32 < blocks) {
blocks -= ctr32;
ctr32 = 0;
}
(*func)(in, out, blocks, key, ivec);
// (*func) does not update ivec, caller does:
PUTU32(ivec + 12, ctr32);
// ... overflow was detected, propogate carry.
if (ctr32 == 0) {
ctr96_inc(ivec);
}
blocks *= 16;
len -= blocks;
out += blocks;
in += blocks;
}
if (len) {
OPENSSL_memset(ecount_buf, 0, 16);
(*func)(ecount_buf, ecount_buf, 1, key, ivec);
++ctr32;
PUTU32(ivec + 12, ctr32);
if (ctr32 == 0) {
ctr96_inc(ivec);
}
while (len--) {
out[n] = in[n] ^ ecount_buf[n];
++n;
}
}
*num = n;
}

View File

@ -0,0 +1,729 @@
/* ====================================================================
* Copyright (c) 2008 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ==================================================================== */
#include <CBigNumBoringSSL_base.h>
#include <assert.h>
#include <string.h>
#include <CBigNumBoringSSL_mem.h>
#include <CBigNumBoringSSL_cpu.h>
#include "internal.h"
#include "../../internal.h"
// kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four
// bits of a |size_t|.
static const size_t kSizeTWithoutLower4Bits = (size_t) -16;
#define GCM_MUL(ctx, Xi) gcm_gmult_nohw((ctx)->Xi.u, (ctx)->gcm_key.Htable)
#define GHASH(ctx, in, len) \
gcm_ghash_nohw((ctx)->Xi.u, (ctx)->gcm_key.Htable, in, len)
// GHASH_CHUNK is "stride parameter" missioned to mitigate cache
// trashing effect. In other words idea is to hash data while it's
// still in L1 cache after encryption pass...
#define GHASH_CHUNK (3 * 1024)
#if defined(GHASH_ASM_X86_64) || defined(GHASH_ASM_X86)
static inline void gcm_reduce_1bit(u128 *V) {
if (sizeof(size_t) == 8) {
uint64_t T = UINT64_C(0xe100000000000000) & (0 - (V->hi & 1));
V->hi = (V->lo << 63) | (V->hi >> 1);
V->lo = (V->lo >> 1) ^ T;
} else {
uint32_t T = 0xe1000000U & (0 - (uint32_t)(V->hi & 1));
V->hi = (V->lo << 63) | (V->hi >> 1);
V->lo = (V->lo >> 1) ^ ((uint64_t)T << 32);
}
}
void gcm_init_ssse3(u128 Htable[16], const uint64_t H[2]) {
Htable[0].hi = 0;
Htable[0].lo = 0;
u128 V;
V.hi = H[1];
V.lo = H[0];
Htable[8] = V;
gcm_reduce_1bit(&V);
Htable[4] = V;
gcm_reduce_1bit(&V);
Htable[2] = V;
gcm_reduce_1bit(&V);
Htable[1] = V;
Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
V = Htable[4];
Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
V = Htable[8];
Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
// Treat |Htable| as a 16x16 byte table and transpose it. Thus, Htable[i]
// contains the i'th byte of j*H for all j.
uint8_t *Hbytes = (uint8_t *)Htable;
for (int i = 0; i < 16; i++) {
for (int j = 0; j < i; j++) {
uint8_t tmp = Hbytes[16*i + j];
Hbytes[16*i + j] = Hbytes[16*j + i];
Hbytes[16*j + i] = tmp;
}
}
}
#endif // GHASH_ASM_X86_64 || GHASH_ASM_X86
#ifdef GCM_FUNCREF
#undef GCM_MUL
#define GCM_MUL(ctx, Xi) (*gcm_gmult_p)((ctx)->Xi.u, (ctx)->gcm_key.Htable)
#undef GHASH
#define GHASH(ctx, in, len) \
(*gcm_ghash_p)((ctx)->Xi.u, (ctx)->gcm_key.Htable, in, len)
#endif // GCM_FUNCREF
void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
u128 *out_key, u128 out_table[16], int *out_is_avx,
const uint8_t gcm_key[16]) {
*out_is_avx = 0;
union {
uint64_t u[2];
uint8_t c[16];
} H;
OPENSSL_memcpy(H.c, gcm_key, 16);
// H is stored in host byte order
H.u[0] = CRYPTO_bswap8(H.u[0]);
H.u[1] = CRYPTO_bswap8(H.u[1]);
OPENSSL_memcpy(out_key, H.c, 16);
#if defined(GHASH_ASM_X86_64)
if (crypto_gcm_clmul_enabled()) {
if (((OPENSSL_ia32cap_get()[1] >> 22) & 0x41) == 0x41) { // AVX+MOVBE
gcm_init_avx(out_table, H.u);
*out_mult = gcm_gmult_avx;
*out_hash = gcm_ghash_avx;
*out_is_avx = 1;
return;
}
gcm_init_clmul(out_table, H.u);
*out_mult = gcm_gmult_clmul;
*out_hash = gcm_ghash_clmul;
return;
}
if (gcm_ssse3_capable()) {
gcm_init_ssse3(out_table, H.u);
*out_mult = gcm_gmult_ssse3;
*out_hash = gcm_ghash_ssse3;
return;
}
#elif defined(GHASH_ASM_X86)
if (crypto_gcm_clmul_enabled()) {
gcm_init_clmul(out_table, H.u);
*out_mult = gcm_gmult_clmul;
*out_hash = gcm_ghash_clmul;
return;
}
if (gcm_ssse3_capable()) {
gcm_init_ssse3(out_table, H.u);
*out_mult = gcm_gmult_ssse3;
*out_hash = gcm_ghash_ssse3;
return;
}
#elif defined(GHASH_ASM_ARM)
if (gcm_pmull_capable()) {
gcm_init_v8(out_table, H.u);
*out_mult = gcm_gmult_v8;
*out_hash = gcm_ghash_v8;
return;
}
if (gcm_neon_capable()) {
gcm_init_neon(out_table, H.u);
*out_mult = gcm_gmult_neon;
*out_hash = gcm_ghash_neon;
return;
}
#elif defined(GHASH_ASM_PPC64LE)
if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
gcm_init_p8(out_table, H.u);
*out_mult = gcm_gmult_p8;
*out_hash = gcm_ghash_p8;
return;
}
#endif
gcm_init_nohw(out_table, H.u);
*out_mult = gcm_gmult_nohw;
*out_hash = gcm_ghash_nohw;
}
void CRYPTO_gcm128_init_key(GCM128_KEY *gcm_key, const AES_KEY *aes_key,
block128_f block, int block_is_hwaes) {
OPENSSL_memset(gcm_key, 0, sizeof(*gcm_key));
gcm_key->block = block;
uint8_t ghash_key[16];
OPENSSL_memset(ghash_key, 0, sizeof(ghash_key));
(*block)(ghash_key, ghash_key, aes_key);
int is_avx;
CRYPTO_ghash_init(&gcm_key->gmult, &gcm_key->ghash, &gcm_key->H,
gcm_key->Htable, &is_avx, ghash_key);
gcm_key->use_aesni_gcm_crypt = (is_avx && block_is_hwaes) ? 1 : 0;
}
void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const AES_KEY *key,
const uint8_t *iv, size_t len) {
#ifdef GCM_FUNCREF
void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
ctx->gcm_key.gmult;
#endif
ctx->Yi.u[0] = 0;
ctx->Yi.u[1] = 0;
ctx->Xi.u[0] = 0;
ctx->Xi.u[1] = 0;
ctx->len.u[0] = 0; // AAD length
ctx->len.u[1] = 0; // message length
ctx->ares = 0;
ctx->mres = 0;
uint32_t ctr;
if (len == 12) {
OPENSSL_memcpy(ctx->Yi.c, iv, 12);
ctx->Yi.c[15] = 1;
ctr = 1;
} else {
uint64_t len0 = len;
while (len >= 16) {
for (size_t i = 0; i < 16; ++i) {
ctx->Yi.c[i] ^= iv[i];
}
GCM_MUL(ctx, Yi);
iv += 16;
len -= 16;
}
if (len) {
for (size_t i = 0; i < len; ++i) {
ctx->Yi.c[i] ^= iv[i];
}
GCM_MUL(ctx, Yi);
}
len0 <<= 3;
ctx->Yi.u[1] ^= CRYPTO_bswap8(len0);
GCM_MUL(ctx, Yi);
ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
}
(*ctx->gcm_key.block)(ctx->Yi.c, ctx->EK0.c, key);
++ctr;
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
}
int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
#ifdef GCM_FUNCREF
void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
ctx->gcm_key.gmult;
void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
size_t len) = ctx->gcm_key.ghash;
#endif
if (ctx->len.u[1]) {
return 0;
}
uint64_t alen = ctx->len.u[0] + len;
if (alen > (UINT64_C(1) << 61) || (sizeof(len) == 8 && alen < len)) {
return 0;
}
ctx->len.u[0] = alen;
unsigned n = ctx->ares;
if (n) {
while (n && len) {
ctx->Xi.c[n] ^= *(aad++);
--len;
n = (n + 1) % 16;
}
if (n == 0) {
GCM_MUL(ctx, Xi);
} else {
ctx->ares = n;
return 1;
}
}
// Process a whole number of blocks.
size_t len_blocks = len & kSizeTWithoutLower4Bits;
if (len_blocks != 0) {
GHASH(ctx, aad, len_blocks);
aad += len_blocks;
len -= len_blocks;
}
// Process the remainder.
if (len != 0) {
n = (unsigned int)len;
for (size_t i = 0; i < len; ++i) {
ctx->Xi.c[i] ^= aad[i];
}
}
ctx->ares = n;
return 1;
}
int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
const uint8_t *in, uint8_t *out, size_t len) {
block128_f block = ctx->gcm_key.block;
#ifdef GCM_FUNCREF
void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
ctx->gcm_key.gmult;
void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
size_t len) = ctx->gcm_key.ghash;
#endif
uint64_t mlen = ctx->len.u[1] + len;
if (mlen > ((UINT64_C(1) << 36) - 32) ||
(sizeof(len) == 8 && mlen < len)) {
return 0;
}
ctx->len.u[1] = mlen;
if (ctx->ares) {
// First call to encrypt finalizes GHASH(AAD)
GCM_MUL(ctx, Xi);
ctx->ares = 0;
}
unsigned n = ctx->mres;
if (n) {
while (n && len) {
ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
--len;
n = (n + 1) % 16;
}
if (n == 0) {
GCM_MUL(ctx, Xi);
} else {
ctx->mres = n;
return 1;
}
}
uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
while (len >= GHASH_CHUNK) {
size_t j = GHASH_CHUNK;
while (j) {
(*block)(ctx->Yi.c, ctx->EKi.c, key);
++ctr;
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
for (size_t i = 0; i < 16; i += sizeof(size_t)) {
store_word_le(out + i,
load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
}
out += 16;
in += 16;
j -= 16;
}
GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
len -= GHASH_CHUNK;
}
size_t len_blocks = len & kSizeTWithoutLower4Bits;
if (len_blocks != 0) {
while (len >= 16) {
(*block)(ctx->Yi.c, ctx->EKi.c, key);
++ctr;
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
for (size_t i = 0; i < 16; i += sizeof(size_t)) {
store_word_le(out + i,
load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
}
out += 16;
in += 16;
len -= 16;
}
GHASH(ctx, out - len_blocks, len_blocks);
}
if (len) {
(*block)(ctx->Yi.c, ctx->EKi.c, key);
++ctr;
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
while (len--) {
ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
++n;
}
}
ctx->mres = n;
return 1;
}
int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
const unsigned char *in, unsigned char *out,
size_t len) {
block128_f block = ctx->gcm_key.block;
#ifdef GCM_FUNCREF
void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
ctx->gcm_key.gmult;
void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
size_t len) = ctx->gcm_key.ghash;
#endif
uint64_t mlen = ctx->len.u[1] + len;
if (mlen > ((UINT64_C(1) << 36) - 32) ||
(sizeof(len) == 8 && mlen < len)) {
return 0;
}
ctx->len.u[1] = mlen;
if (ctx->ares) {
// First call to decrypt finalizes GHASH(AAD)
GCM_MUL(ctx, Xi);
ctx->ares = 0;
}
unsigned n = ctx->mres;
if (n) {
while (n && len) {
uint8_t c = *(in++);
*(out++) = c ^ ctx->EKi.c[n];
ctx->Xi.c[n] ^= c;
--len;
n = (n + 1) % 16;
}
if (n == 0) {
GCM_MUL(ctx, Xi);
} else {
ctx->mres = n;
return 1;
}
}
uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
while (len >= GHASH_CHUNK) {
size_t j = GHASH_CHUNK;
GHASH(ctx, in, GHASH_CHUNK);
while (j) {
(*block)(ctx->Yi.c, ctx->EKi.c, key);
++ctr;
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
for (size_t i = 0; i < 16; i += sizeof(size_t)) {
store_word_le(out + i,
load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
}
out += 16;
in += 16;
j -= 16;
}
len -= GHASH_CHUNK;
}
size_t len_blocks = len & kSizeTWithoutLower4Bits;
if (len_blocks != 0) {
GHASH(ctx, in, len_blocks);
while (len >= 16) {
(*block)(ctx->Yi.c, ctx->EKi.c, key);
++ctr;
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
for (size_t i = 0; i < 16; i += sizeof(size_t)) {
store_word_le(out + i,
load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
}
out += 16;
in += 16;
len -= 16;
}
}
if (len) {
(*block)(ctx->Yi.c, ctx->EKi.c, key);
++ctr;
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
while (len--) {
uint8_t c = in[n];
ctx->Xi.c[n] ^= c;
out[n] = c ^ ctx->EKi.c[n];
++n;
}
}
ctx->mres = n;
return 1;
}
int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const AES_KEY *key,
const uint8_t *in, uint8_t *out, size_t len,
ctr128_f stream) {
#ifdef GCM_FUNCREF
void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
ctx->gcm_key.gmult;
void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
size_t len) = ctx->gcm_key.ghash;
#endif
uint64_t mlen = ctx->len.u[1] + len;
if (mlen > ((UINT64_C(1) << 36) - 32) ||
(sizeof(len) == 8 && mlen < len)) {
return 0;
}
ctx->len.u[1] = mlen;
if (ctx->ares) {
// First call to encrypt finalizes GHASH(AAD)
GCM_MUL(ctx, Xi);
ctx->ares = 0;
}
unsigned n = ctx->mres;
if (n) {
while (n && len) {
ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
--len;
n = (n + 1) % 16;
}
if (n == 0) {
GCM_MUL(ctx, Xi);
} else {
ctx->mres = n;
return 1;
}
}
#if defined(AESNI_GCM)
// Check |len| to work around a C language bug. See https://crbug.com/1019588.
if (ctx->gcm_key.use_aesni_gcm_crypt && len > 0) {
// |aesni_gcm_encrypt| may not process all the input given to it. It may
// not process *any* of its input if it is deemed too small.
size_t bulk = aesni_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
in += bulk;
out += bulk;
len -= bulk;
}
#endif
uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
while (len >= GHASH_CHUNK) {
(*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
ctr += GHASH_CHUNK / 16;
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
GHASH(ctx, out, GHASH_CHUNK);
out += GHASH_CHUNK;
in += GHASH_CHUNK;
len -= GHASH_CHUNK;
}
size_t len_blocks = len & kSizeTWithoutLower4Bits;
if (len_blocks != 0) {
size_t j = len_blocks / 16;
(*stream)(in, out, j, key, ctx->Yi.c);
ctr += (unsigned int)j;
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
in += len_blocks;
len -= len_blocks;
GHASH(ctx, out, len_blocks);
out += len_blocks;
}
if (len) {
(*ctx->gcm_key.block)(ctx->Yi.c, ctx->EKi.c, key);
++ctr;
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
while (len--) {
ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
++n;
}
}
ctx->mres = n;
return 1;
}
int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const AES_KEY *key,
const uint8_t *in, uint8_t *out, size_t len,
ctr128_f stream) {
#ifdef GCM_FUNCREF
void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
ctx->gcm_key.gmult;
void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
size_t len) = ctx->gcm_key.ghash;
#endif
uint64_t mlen = ctx->len.u[1] + len;
if (mlen > ((UINT64_C(1) << 36) - 32) ||
(sizeof(len) == 8 && mlen < len)) {
return 0;
}
ctx->len.u[1] = mlen;
if (ctx->ares) {
// First call to decrypt finalizes GHASH(AAD)
GCM_MUL(ctx, Xi);
ctx->ares = 0;
}
unsigned n = ctx->mres;
if (n) {
while (n && len) {
uint8_t c = *(in++);
*(out++) = c ^ ctx->EKi.c[n];
ctx->Xi.c[n] ^= c;
--len;
n = (n + 1) % 16;
}
if (n == 0) {
GCM_MUL(ctx, Xi);
} else {
ctx->mres = n;
return 1;
}
}
#if defined(AESNI_GCM)
// Check |len| to work around a C language bug. See https://crbug.com/1019588.
if (ctx->gcm_key.use_aesni_gcm_crypt && len > 0) {
// |aesni_gcm_decrypt| may not process all the input given to it. It may
// not process *any* of its input if it is deemed too small.
size_t bulk = aesni_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
in += bulk;
out += bulk;
len -= bulk;
}
#endif
uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
while (len >= GHASH_CHUNK) {
GHASH(ctx, in, GHASH_CHUNK);
(*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
ctr += GHASH_CHUNK / 16;
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
out += GHASH_CHUNK;
in += GHASH_CHUNK;
len -= GHASH_CHUNK;
}
size_t len_blocks = len & kSizeTWithoutLower4Bits;
if (len_blocks != 0) {
size_t j = len_blocks / 16;
GHASH(ctx, in, len_blocks);
(*stream)(in, out, j, key, ctx->Yi.c);
ctr += (unsigned int)j;
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
out += len_blocks;
in += len_blocks;
len -= len_blocks;
}
if (len) {
(*ctx->gcm_key.block)(ctx->Yi.c, ctx->EKi.c, key);
++ctr;
ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
while (len--) {
uint8_t c = in[n];
ctx->Xi.c[n] ^= c;
out[n] = c ^ ctx->EKi.c[n];
++n;
}
}
ctx->mres = n;
return 1;
}
int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
#ifdef GCM_FUNCREF
void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
ctx->gcm_key.gmult;
#endif
if (ctx->mres || ctx->ares) {
GCM_MUL(ctx, Xi);
}
ctx->Xi.u[0] ^= CRYPTO_bswap8(ctx->len.u[0] << 3);
ctx->Xi.u[1] ^= CRYPTO_bswap8(ctx->len.u[1] << 3);
GCM_MUL(ctx, Xi);
ctx->Xi.u[0] ^= ctx->EK0.u[0];
ctx->Xi.u[1] ^= ctx->EK0.u[1];
if (tag && len <= sizeof(ctx->Xi)) {
return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
} else {
return 0;
}
}
void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
CRYPTO_gcm128_finish(ctx, NULL, 0);
OPENSSL_memcpy(tag, ctx->Xi.c,
len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
}
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
int crypto_gcm_clmul_enabled(void) {
#if defined(GHASH_ASM_X86) || defined(GHASH_ASM_X86_64)
const uint32_t *ia32cap = OPENSSL_ia32cap_get();
return (ia32cap[0] & (1 << 24)) && // check FXSR bit
(ia32cap[1] & (1 << 1)); // check PCLMULQDQ bit
#else
return 0;
#endif
}
#endif

View File

@ -0,0 +1,304 @@
/* Copyright (c) 2019, Google Inc.
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
* SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
#include <CBigNumBoringSSL_base.h>
#include "../../internal.h"
#include "internal.h"
#if !defined(BORINGSSL_HAS_UINT128) && defined(OPENSSL_SSE2)
#include <emmintrin.h>
#endif
// This file contains a constant-time implementation of GHASH based on the notes
// in https://bearssl.org/constanttime.html#ghash-for-gcm and the reduction
// algorithm described in
// https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf.
//
// Unlike the BearSSL notes, we use uint128_t in the 64-bit implementation. Our
// primary compilers (clang, clang-cl, and gcc) all support it. MSVC will run
// the 32-bit implementation, but we can use its intrinsics if necessary.
#if defined(BORINGSSL_HAS_UINT128)
static void gcm_mul64_nohw(uint64_t *out_lo, uint64_t *out_hi, uint64_t a,
uint64_t b) {
// One term every four bits means the largest term is 64/4 = 16, which barely
// overflows into the next term. Using one term every five bits would cost 25
// multiplications instead of 16. It is faster to mask off the bottom four
// bits of |a|, giving a largest term of 60/4 = 15, and apply the bottom bits
// separately.
uint64_t a0 = a & UINT64_C(0x1111111111111110);
uint64_t a1 = a & UINT64_C(0x2222222222222220);
uint64_t a2 = a & UINT64_C(0x4444444444444440);
uint64_t a3 = a & UINT64_C(0x8888888888888880);
uint64_t b0 = b & UINT64_C(0x1111111111111111);
uint64_t b1 = b & UINT64_C(0x2222222222222222);
uint64_t b2 = b & UINT64_C(0x4444444444444444);
uint64_t b3 = b & UINT64_C(0x8888888888888888);
uint128_t c0 = (a0 * (uint128_t)b0) ^ (a1 * (uint128_t)b3) ^
(a2 * (uint128_t)b2) ^ (a3 * (uint128_t)b1);
uint128_t c1 = (a0 * (uint128_t)b1) ^ (a1 * (uint128_t)b0) ^
(a2 * (uint128_t)b3) ^ (a3 * (uint128_t)b2);
uint128_t c2 = (a0 * (uint128_t)b2) ^ (a1 * (uint128_t)b1) ^
(a2 * (uint128_t)b0) ^ (a3 * (uint128_t)b3);
uint128_t c3 = (a0 * (uint128_t)b3) ^ (a1 * (uint128_t)b2) ^
(a2 * (uint128_t)b1) ^ (a3 * (uint128_t)b0);
// Multiply the bottom four bits of |a| with |b|.
uint64_t a0_mask = UINT64_C(0) - (a & 1);
uint64_t a1_mask = UINT64_C(0) - ((a >> 1) & 1);
uint64_t a2_mask = UINT64_C(0) - ((a >> 2) & 1);
uint64_t a3_mask = UINT64_C(0) - ((a >> 3) & 1);
uint128_t extra = (a0_mask & b) ^ ((uint128_t)(a1_mask & b) << 1) ^
((uint128_t)(a2_mask & b) << 2) ^
((uint128_t)(a3_mask & b) << 3);
*out_lo = (((uint64_t)c0) & UINT64_C(0x1111111111111111)) ^
(((uint64_t)c1) & UINT64_C(0x2222222222222222)) ^
(((uint64_t)c2) & UINT64_C(0x4444444444444444)) ^
(((uint64_t)c3) & UINT64_C(0x8888888888888888)) ^ ((uint64_t)extra);
*out_hi = (((uint64_t)(c0 >> 64)) & UINT64_C(0x1111111111111111)) ^
(((uint64_t)(c1 >> 64)) & UINT64_C(0x2222222222222222)) ^
(((uint64_t)(c2 >> 64)) & UINT64_C(0x4444444444444444)) ^
(((uint64_t)(c3 >> 64)) & UINT64_C(0x8888888888888888)) ^
((uint64_t)(extra >> 64));
}
#elif defined(OPENSSL_SSE2)
static __m128i gcm_mul32_nohw(uint32_t a, uint32_t b) {
// One term every four bits means the largest term is 32/4 = 8, which does not
// overflow into the next term.
__m128i aa = _mm_setr_epi32(a, 0, a, 0);
__m128i bb = _mm_setr_epi32(b, 0, b, 0);
__m128i a0a0 =
_mm_and_si128(aa, _mm_setr_epi32(0x11111111, 0, 0x11111111, 0));
__m128i a2a2 =
_mm_and_si128(aa, _mm_setr_epi32(0x44444444, 0, 0x44444444, 0));
__m128i b0b1 =
_mm_and_si128(bb, _mm_setr_epi32(0x11111111, 0, 0x22222222, 0));
__m128i b2b3 =
_mm_and_si128(bb, _mm_setr_epi32(0x44444444, 0, 0x88888888, 0));
__m128i c0c1 =
_mm_xor_si128(_mm_mul_epu32(a0a0, b0b1), _mm_mul_epu32(a2a2, b2b3));
__m128i c2c3 =
_mm_xor_si128(_mm_mul_epu32(a2a2, b0b1), _mm_mul_epu32(a0a0, b2b3));
__m128i a1a1 =
_mm_and_si128(aa, _mm_setr_epi32(0x22222222, 0, 0x22222222, 0));
__m128i a3a3 =
_mm_and_si128(aa, _mm_setr_epi32(0x88888888, 0, 0x88888888, 0));
__m128i b3b0 =
_mm_and_si128(bb, _mm_setr_epi32(0x88888888, 0, 0x11111111, 0));
__m128i b1b2 =
_mm_and_si128(bb, _mm_setr_epi32(0x22222222, 0, 0x44444444, 0));
c0c1 = _mm_xor_si128(c0c1, _mm_mul_epu32(a1a1, b3b0));
c0c1 = _mm_xor_si128(c0c1, _mm_mul_epu32(a3a3, b1b2));
c2c3 = _mm_xor_si128(c2c3, _mm_mul_epu32(a3a3, b3b0));
c2c3 = _mm_xor_si128(c2c3, _mm_mul_epu32(a1a1, b1b2));
c0c1 = _mm_and_si128(
c0c1, _mm_setr_epi32(0x11111111, 0x11111111, 0x22222222, 0x22222222));
c2c3 = _mm_and_si128(
c2c3, _mm_setr_epi32(0x44444444, 0x44444444, 0x88888888, 0x88888888));
c0c1 = _mm_xor_si128(c0c1, c2c3);
// c0 ^= c1
c0c1 = _mm_xor_si128(c0c1, _mm_srli_si128(c0c1, 8));
return c0c1;
}
static void gcm_mul64_nohw(uint64_t *out_lo, uint64_t *out_hi, uint64_t a,
uint64_t b) {
uint32_t a0 = a & 0xffffffff;
uint32_t a1 = a >> 32;
uint32_t b0 = b & 0xffffffff;
uint32_t b1 = b >> 32;
// Karatsuba multiplication.
__m128i lo = gcm_mul32_nohw(a0, b0);
__m128i hi = gcm_mul32_nohw(a1, b1);
__m128i mid = gcm_mul32_nohw(a0 ^ a1, b0 ^ b1);
mid = _mm_xor_si128(mid, lo);
mid = _mm_xor_si128(mid, hi);
__m128i ret = _mm_unpacklo_epi64(lo, hi);
mid = _mm_slli_si128(mid, 4);
mid = _mm_and_si128(mid, _mm_setr_epi32(0, 0xffffffff, 0xffffffff, 0));
ret = _mm_xor_si128(ret, mid);
memcpy(out_lo, &ret, 8);
memcpy(out_hi, ((char*)&ret) + 8, 8);
}
#else // !BORINGSSL_HAS_UINT128 && !OPENSSL_SSE2
static uint64_t gcm_mul32_nohw(uint32_t a, uint32_t b) {
// One term every four bits means the largest term is 32/4 = 8, which does not
// overflow into the next term.
uint32_t a0 = a & 0x11111111;
uint32_t a1 = a & 0x22222222;
uint32_t a2 = a & 0x44444444;
uint32_t a3 = a & 0x88888888;
uint32_t b0 = b & 0x11111111;
uint32_t b1 = b & 0x22222222;
uint32_t b2 = b & 0x44444444;
uint32_t b3 = b & 0x88888888;
uint64_t c0 = (a0 * (uint64_t)b0) ^ (a1 * (uint64_t)b3) ^
(a2 * (uint64_t)b2) ^ (a3 * (uint64_t)b1);
uint64_t c1 = (a0 * (uint64_t)b1) ^ (a1 * (uint64_t)b0) ^
(a2 * (uint64_t)b3) ^ (a3 * (uint64_t)b2);
uint64_t c2 = (a0 * (uint64_t)b2) ^ (a1 * (uint64_t)b1) ^
(a2 * (uint64_t)b0) ^ (a3 * (uint64_t)b3);
uint64_t c3 = (a0 * (uint64_t)b3) ^ (a1 * (uint64_t)b2) ^
(a2 * (uint64_t)b1) ^ (a3 * (uint64_t)b0);
return (c0 & UINT64_C(0x1111111111111111)) |
(c1 & UINT64_C(0x2222222222222222)) |
(c2 & UINT64_C(0x4444444444444444)) |
(c3 & UINT64_C(0x8888888888888888));
}
static void gcm_mul64_nohw(uint64_t *out_lo, uint64_t *out_hi, uint64_t a,
uint64_t b) {
uint32_t a0 = a & 0xffffffff;
uint32_t a1 = a >> 32;
uint32_t b0 = b & 0xffffffff;
uint32_t b1 = b >> 32;
// Karatsuba multiplication.
uint64_t lo = gcm_mul32_nohw(a0, b0);
uint64_t hi = gcm_mul32_nohw(a1, b1);
uint64_t mid = gcm_mul32_nohw(a0 ^ a1, b0 ^ b1) ^ lo ^ hi;
*out_lo = lo ^ (mid << 32);
*out_hi = hi ^ (mid >> 32);
}
#endif // BORINGSSL_HAS_UINT128
void gcm_init_nohw(u128 Htable[16], const uint64_t Xi[2]) {
// We implement GHASH in terms of POLYVAL, as described in RFC8452. This
// avoids a shift by 1 in the multiplication, needed to account for bit
// reversal losing a bit after multiplication, that is,
// rev128(X) * rev128(Y) = rev255(X*Y).
//
// Per Appendix A, we run mulX_POLYVAL. Note this is the same transformation
// applied by |gcm_init_clmul|, etc. Note |Xi| has already been byteswapped.
//
// See also slide 16 of
// https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf
Htable[0].lo = Xi[1];
Htable[0].hi = Xi[0];
uint64_t carry = Htable[0].hi >> 63;
carry = 0u - carry;
Htable[0].hi <<= 1;
Htable[0].hi |= Htable[0].lo >> 63;
Htable[0].lo <<= 1;
// The irreducible polynomial is 1 + x^121 + x^126 + x^127 + x^128, so we
// conditionally add 0xc200...0001.
Htable[0].lo ^= carry & 1;
Htable[0].hi ^= carry & UINT64_C(0xc200000000000000);
// This implementation does not use the rest of |Htable|.
}
static void gcm_polyval_nohw(uint64_t Xi[2], const u128 *H) {
// Karatsuba multiplication. The product of |Xi| and |H| is stored in |r0|
// through |r3|. Note there is no byte or bit reversal because we are
// evaluating POLYVAL.
uint64_t r0, r1;
gcm_mul64_nohw(&r0, &r1, Xi[0], H->lo);
uint64_t r2, r3;
gcm_mul64_nohw(&r2, &r3, Xi[1], H->hi);
uint64_t mid0, mid1;
gcm_mul64_nohw(&mid0, &mid1, Xi[0] ^ Xi[1], H->hi ^ H->lo);
mid0 ^= r0 ^ r2;
mid1 ^= r1 ^ r3;
r2 ^= mid1;
r1 ^= mid0;
// Now we multiply our 256-bit result by x^-128 and reduce. |r2| and
// |r3| shifts into position and we must multiply |r0| and |r1| by x^-128. We
// have:
//
// 1 = x^121 + x^126 + x^127 + x^128
// x^-128 = x^-7 + x^-2 + x^-1 + 1
//
// This is the GHASH reduction step, but with bits flowing in reverse.
// The x^-7, x^-2, and x^-1 terms shift bits past x^0, which would require
// another reduction steps. Instead, we gather the excess bits, incorporate
// them into |r0| and |r1| and reduce once. See slides 17-19
// of https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf.
r1 ^= (r0 << 63) ^ (r0 << 62) ^ (r0 << 57);
// 1
r2 ^= r0;
r3 ^= r1;
// x^-1
r2 ^= r0 >> 1;
r2 ^= r1 << 63;
r3 ^= r1 >> 1;
// x^-2
r2 ^= r0 >> 2;
r2 ^= r1 << 62;
r3 ^= r1 >> 2;
// x^-7
r2 ^= r0 >> 7;
r2 ^= r1 << 57;
r3 ^= r1 >> 7;
Xi[0] = r2;
Xi[1] = r3;
}
void gcm_gmult_nohw(uint64_t Xi[2], const u128 Htable[16]) {
uint64_t swapped[2];
swapped[0] = CRYPTO_bswap8(Xi[1]);
swapped[1] = CRYPTO_bswap8(Xi[0]);
gcm_polyval_nohw(swapped, &Htable[0]);
Xi[0] = CRYPTO_bswap8(swapped[1]);
Xi[1] = CRYPTO_bswap8(swapped[0]);
}
void gcm_ghash_nohw(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
size_t len) {
uint64_t swapped[2];
swapped[0] = CRYPTO_bswap8(Xi[1]);
swapped[1] = CRYPTO_bswap8(Xi[0]);
while (len >= 16) {
uint64_t block[2];
OPENSSL_memcpy(block, inp, 16);
swapped[0] ^= CRYPTO_bswap8(block[1]);
swapped[1] ^= CRYPTO_bswap8(block[0]);
gcm_polyval_nohw(swapped, &Htable[0]);
inp += 16;
len -= 16;
}
Xi[0] = CRYPTO_bswap8(swapped[1]);
Xi[1] = CRYPTO_bswap8(swapped[0]);
}

View File

@ -0,0 +1,441 @@
/* ====================================================================
* Copyright (c) 2008 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ==================================================================== */
#ifndef OPENSSL_HEADER_MODES_INTERNAL_H
#define OPENSSL_HEADER_MODES_INTERNAL_H
#include <CBigNumBoringSSL_base.h>
#include <CBigNumBoringSSL_aes.h>
#include <CBigNumBoringSSL_cpu.h>
#include <stdlib.h>
#include <string.h>
#include "../../internal.h"
#if defined(__cplusplus)
extern "C" {
#endif
static inline uint32_t GETU32(const void *in) {
uint32_t v;
OPENSSL_memcpy(&v, in, sizeof(v));
return CRYPTO_bswap4(v);
}
static inline void PUTU32(void *out, uint32_t v) {
v = CRYPTO_bswap4(v);
OPENSSL_memcpy(out, &v, sizeof(v));
}
static inline size_t load_word_le(const void *in) {
size_t v;
OPENSSL_memcpy(&v, in, sizeof(v));
return v;
}
static inline void store_word_le(void *out, size_t v) {
OPENSSL_memcpy(out, &v, sizeof(v));
}
// block128_f is the type of an AES block cipher implementation.
//
// Unlike upstream OpenSSL, it and the other functions in this file hard-code
// |AES_KEY|. It is undefined in C to call a function pointer with anything
// other than the original type. Thus we either must match |block128_f| to the
// type signature of |AES_encrypt| and friends or pass in |void*| wrapper
// functions.
//
// These functions are called exclusively with AES, so we use the former.
typedef void (*block128_f)(const uint8_t in[16], uint8_t out[16],
const AES_KEY *key);
// CTR.
// ctr128_f is the type of a function that performs CTR-mode encryption.
typedef void (*ctr128_f)(const uint8_t *in, uint8_t *out, size_t blocks,
const AES_KEY *key, const uint8_t ivec[16]);
// CRYPTO_ctr128_encrypt encrypts (or decrypts, it's the same in CTR mode)
// |len| bytes from |in| to |out| using |block| in counter mode. There's no
// requirement that |len| be a multiple of any value and any partial blocks are
// stored in |ecount_buf| and |*num|, which must be zeroed before the initial
// call. The counter is a 128-bit, big-endian value in |ivec| and is
// incremented by this function.
void CRYPTO_ctr128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16],
uint8_t ecount_buf[16], unsigned *num,
block128_f block);
// CRYPTO_ctr128_encrypt_ctr32 acts like |CRYPTO_ctr128_encrypt| but takes
// |ctr|, a function that performs CTR mode but only deals with the lower 32
// bits of the counter. This is useful when |ctr| can be an optimised
// function.
void CRYPTO_ctr128_encrypt_ctr32(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16],
uint8_t ecount_buf[16], unsigned *num,
ctr128_f ctr);
// GCM.
//
// This API differs from the upstream API slightly. The |GCM128_CONTEXT| does
// not have a |key| pointer that points to the key as upstream's version does.
// Instead, every function takes a |key| parameter. This way |GCM128_CONTEXT|
// can be safely copied. Additionally, |gcm_key| is split into a separate
// struct.
typedef struct { uint64_t hi,lo; } u128;
// gmult_func multiplies |Xi| by the GCM key and writes the result back to
// |Xi|.
typedef void (*gmult_func)(uint64_t Xi[2], const u128 Htable[16]);
// ghash_func repeatedly multiplies |Xi| by the GCM key and adds in blocks from
// |inp|. The result is written back to |Xi| and the |len| argument must be a
// multiple of 16.
typedef void (*ghash_func)(uint64_t Xi[2], const u128 Htable[16],
const uint8_t *inp, size_t len);
typedef struct gcm128_key_st {
// Note the MOVBE-based, x86-64, GHASH assembly requires |H| and |Htable| to
// be the first two elements of this struct. Additionally, some assembly
// routines require a 16-byte-aligned |Htable| when hashing data, but not
// initialization. |GCM128_KEY| is not itself aligned to simplify embedding in
// |EVP_AEAD_CTX|, but |Htable|'s offset must be a multiple of 16.
u128 H;
u128 Htable[16];
gmult_func gmult;
ghash_func ghash;
block128_f block;
// use_aesni_gcm_crypt is true if this context should use the assembly
// functions |aesni_gcm_encrypt| and |aesni_gcm_decrypt| to process data.
unsigned use_aesni_gcm_crypt:1;
} GCM128_KEY;
// GCM128_CONTEXT contains state for a single GCM operation. The structure
// should be zero-initialized before use.
typedef struct {
// The following 5 names follow names in GCM specification
union {
uint64_t u[2];
uint32_t d[4];
uint8_t c[16];
size_t t[16 / sizeof(size_t)];
} Yi, EKi, EK0, len, Xi;
// Note that the order of |Xi| and |gcm_key| is fixed by the MOVBE-based,
// x86-64, GHASH assembly. Additionally, some assembly routines require
// |gcm_key| to be 16-byte aligned. |GCM128_KEY| is not itself aligned to
// simplify embedding in |EVP_AEAD_CTX|.
alignas(16) GCM128_KEY gcm_key;
unsigned mres, ares;
} GCM128_CONTEXT;
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
// crypto_gcm_clmul_enabled returns one if the CLMUL implementation of GCM is
// used.
int crypto_gcm_clmul_enabled(void);
#endif
// CRYPTO_ghash_init writes a precomputed table of powers of |gcm_key| to
// |out_table| and sets |*out_mult| and |*out_hash| to (potentially hardware
// accelerated) functions for performing operations in the GHASH field. If the
// AVX implementation was used |*out_is_avx| will be true.
void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
u128 *out_key, u128 out_table[16], int *out_is_avx,
const uint8_t gcm_key[16]);
// CRYPTO_gcm128_init_key initialises |gcm_key| to use |block| (typically AES)
// with the given key. |block_is_hwaes| is one if |block| is |aes_hw_encrypt|.
OPENSSL_EXPORT void CRYPTO_gcm128_init_key(GCM128_KEY *gcm_key,
const AES_KEY *key, block128_f block,
int block_is_hwaes);
// CRYPTO_gcm128_setiv sets the IV (nonce) for |ctx|. The |key| must be the
// same key that was passed to |CRYPTO_gcm128_init|.
OPENSSL_EXPORT void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const AES_KEY *key,
const uint8_t *iv, size_t iv_len);
// CRYPTO_gcm128_aad sets the authenticated data for an instance of GCM.
// This must be called before and data is encrypted. It returns one on success
// and zero otherwise.
OPENSSL_EXPORT int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad,
size_t len);
// CRYPTO_gcm128_encrypt encrypts |len| bytes from |in| to |out|. The |key|
// must be the same key that was passed to |CRYPTO_gcm128_init|. It returns one
// on success and zero otherwise.
OPENSSL_EXPORT int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
const AES_KEY *key, const uint8_t *in,
uint8_t *out, size_t len);
// CRYPTO_gcm128_decrypt decrypts |len| bytes from |in| to |out|. The |key|
// must be the same key that was passed to |CRYPTO_gcm128_init|. It returns one
// on success and zero otherwise.
OPENSSL_EXPORT int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
const AES_KEY *key, const uint8_t *in,
uint8_t *out, size_t len);
// CRYPTO_gcm128_encrypt_ctr32 encrypts |len| bytes from |in| to |out| using
// a CTR function that only handles the bottom 32 bits of the nonce, like
// |CRYPTO_ctr128_encrypt_ctr32|. The |key| must be the same key that was
// passed to |CRYPTO_gcm128_init|. It returns one on success and zero
// otherwise.
OPENSSL_EXPORT int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
const AES_KEY *key,
const uint8_t *in, uint8_t *out,
size_t len, ctr128_f stream);
// CRYPTO_gcm128_decrypt_ctr32 decrypts |len| bytes from |in| to |out| using
// a CTR function that only handles the bottom 32 bits of the nonce, like
// |CRYPTO_ctr128_encrypt_ctr32|. The |key| must be the same key that was
// passed to |CRYPTO_gcm128_init|. It returns one on success and zero
// otherwise.
OPENSSL_EXPORT int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
const AES_KEY *key,
const uint8_t *in, uint8_t *out,
size_t len, ctr128_f stream);
// CRYPTO_gcm128_finish calculates the authenticator and compares it against
// |len| bytes of |tag|. It returns one on success and zero otherwise.
OPENSSL_EXPORT int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag,
size_t len);
// CRYPTO_gcm128_tag calculates the authenticator and copies it into |tag|.
// The minimum of |len| and 16 bytes are copied into |tag|.
OPENSSL_EXPORT void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, uint8_t *tag,
size_t len);
// GCM assembly.
void gcm_init_nohw(u128 Htable[16], const uint64_t H[2]);
void gcm_gmult_nohw(uint64_t Xi[2], const u128 Htable[16]);
void gcm_ghash_nohw(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
size_t len);
#if !defined(OPENSSL_NO_ASM)
#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
#define GCM_FUNCREF
void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
size_t len);
OPENSSL_INLINE char gcm_ssse3_capable(void) {
return (OPENSSL_ia32cap_get()[1] & (1 << (41 - 32))) != 0;
}
// |gcm_gmult_ssse3| and |gcm_ghash_ssse3| require |Htable| to be
// 16-byte-aligned, but |gcm_init_ssse3| does not.
void gcm_init_ssse3(u128 Htable[16], const uint64_t Xi[2]);
void gcm_gmult_ssse3(uint64_t Xi[2], const u128 Htable[16]);
void gcm_ghash_ssse3(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in,
size_t len);
#if defined(OPENSSL_X86_64)
#define GHASH_ASM_X86_64
void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]);
void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]);
void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in,
size_t len);
#define AESNI_GCM
size_t aesni_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi);
size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi);
#endif // OPENSSL_X86_64
#if defined(OPENSSL_X86)
#define GHASH_ASM_X86
#endif // OPENSSL_X86
#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
#define GHASH_ASM_ARM
#define GCM_FUNCREF
OPENSSL_INLINE int gcm_pmull_capable(void) {
return CRYPTO_is_ARMv8_PMULL_capable();
}
void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]);
void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]);
void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
size_t len);
OPENSSL_INLINE int gcm_neon_capable(void) { return CRYPTO_is_NEON_capable(); }
void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]);
void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
size_t len);
#elif defined(OPENSSL_PPC64LE)
#define GHASH_ASM_PPC64LE
#define GCM_FUNCREF
void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]);
void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]);
void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
size_t len);
#endif
#endif // OPENSSL_NO_ASM
// CBC.
// cbc128_f is the type of a function that performs CBC-mode encryption.
typedef void (*cbc128_f)(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16], int enc);
// CRYPTO_cbc128_encrypt encrypts |len| bytes from |in| to |out| using the
// given IV and block cipher in CBC mode. The input need not be a multiple of
// 128 bits long, but the output will round up to the nearest 128 bit multiple,
// zero padding the input if needed. The IV will be updated on return.
void CRYPTO_cbc128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16],
block128_f block);
// CRYPTO_cbc128_decrypt decrypts |len| bytes from |in| to |out| using the
// given IV and block cipher in CBC mode. If |len| is not a multiple of 128
// bits then only that many bytes will be written, but a multiple of 128 bits
// is always read from |in|. The IV will be updated on return.
void CRYPTO_cbc128_decrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16],
block128_f block);
// OFB.
// CRYPTO_ofb128_encrypt encrypts (or decrypts, it's the same with OFB mode)
// |len| bytes from |in| to |out| using |block| in OFB mode. There's no
// requirement that |len| be a multiple of any value and any partial blocks are
// stored in |ivec| and |*num|, the latter must be zero before the initial
// call.
void CRYPTO_ofb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16], unsigned *num,
block128_f block);
// CFB.
// CRYPTO_cfb128_encrypt encrypts (or decrypts, if |enc| is zero) |len| bytes
// from |in| to |out| using |block| in CFB mode. There's no requirement that
// |len| be a multiple of any value and any partial blocks are stored in |ivec|
// and |*num|, the latter must be zero before the initial call.
void CRYPTO_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16], unsigned *num,
int enc, block128_f block);
// CRYPTO_cfb128_8_encrypt encrypts (or decrypts, if |enc| is zero) |len| bytes
// from |in| to |out| using |block| in CFB-8 mode. Prior to the first call
// |num| should be set to zero.
void CRYPTO_cfb128_8_encrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16],
unsigned *num, int enc, block128_f block);
// CRYPTO_cfb128_1_encrypt encrypts (or decrypts, if |enc| is zero) |len| bytes
// from |in| to |out| using |block| in CFB-1 mode. Prior to the first call
// |num| should be set to zero.
void CRYPTO_cfb128_1_encrypt(const uint8_t *in, uint8_t *out, size_t bits,
const AES_KEY *key, uint8_t ivec[16],
unsigned *num, int enc, block128_f block);
size_t CRYPTO_cts128_encrypt_block(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16],
block128_f block);
// POLYVAL.
//
// POLYVAL is a polynomial authenticator that operates over a field very
// similar to the one that GHASH uses. See
// https://tools.ietf.org/html/draft-irtf-cfrg-gcmsiv-02#section-3.
typedef union {
uint64_t u[2];
uint8_t c[16];
} polyval_block;
struct polyval_ctx {
// Note that the order of |S|, |H| and |Htable| is fixed by the MOVBE-based,
// x86-64, GHASH assembly. Additionally, some assembly routines require
// |Htable| to be 16-byte aligned.
polyval_block S;
u128 H;
alignas(16) u128 Htable[16];
gmult_func gmult;
ghash_func ghash;
};
// CRYPTO_POLYVAL_init initialises |ctx| using |key|.
void CRYPTO_POLYVAL_init(struct polyval_ctx *ctx, const uint8_t key[16]);
// CRYPTO_POLYVAL_update_blocks updates the accumulator in |ctx| given the
// blocks from |in|. Only a whole number of blocks can be processed so |in_len|
// must be a multiple of 16.
void CRYPTO_POLYVAL_update_blocks(struct polyval_ctx *ctx, const uint8_t *in,
size_t in_len);
// CRYPTO_POLYVAL_finish writes the accumulator from |ctx| to |out|.
void CRYPTO_POLYVAL_finish(const struct polyval_ctx *ctx, uint8_t out[16]);
#if defined(__cplusplus)
} // extern C
#endif
#endif // OPENSSL_HEADER_MODES_INTERNAL_H

View File

@ -0,0 +1,96 @@
/* ====================================================================
* Copyright (c) 2008 The OpenSSL Project. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
*
* 3. All advertising materials mentioning features or use of this
* software must display the following acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
*
* 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
* endorse or promote products derived from this software without
* prior written permission. For written permission, please contact
* openssl-core@openssl.org.
*
* 5. Products derived from this software may not be called "OpenSSL"
* nor may "OpenSSL" appear in their names without prior written
* permission of the OpenSSL Project.
*
* 6. Redistributions of any form whatsoever must retain the following
* acknowledgment:
* "This product includes software developed by the OpenSSL Project
* for use in the OpenSSL Toolkit (http://www.openssl.org/)"
*
* THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
* EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
* ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
* ==================================================================== */
#include <CBigNumBoringSSL_type_check.h>
#include <assert.h>
#include <string.h>
#include "internal.h"
OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
"block cannot be divided into size_t");
void CRYPTO_ofb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
const AES_KEY *key, uint8_t ivec[16], unsigned *num,
block128_f block) {
assert(in && out && key && ivec && num);
unsigned n = *num;
while (n && len) {
*(out++) = *(in++) ^ ivec[n];
--len;
n = (n + 1) % 16;
}
while (len >= 16) {
(*block)(ivec, ivec, key);
for (; n < 16; n += sizeof(size_t)) {
size_t a, b;
OPENSSL_memcpy(&a, in + n, sizeof(size_t));
OPENSSL_memcpy(&b, ivec + n, sizeof(size_t));
const size_t c = a ^ b;
OPENSSL_memcpy(out + n, &c, sizeof(size_t));
}
len -= 16;
out += 16;
in += 16;
n = 0;
}
if (len) {
(*block)(ivec, ivec, key);
while (len--) {
out[n] = in[n] ^ ivec[n];
++n;
}
}
*num = n;
}

Some files were not shown because too many files have changed in this diff Show More