Use BoringSSL implementation of BIGNUM (#2)

Vendor limited section of BoringSSL instead of depending on OpenSSL libs. Main reason for doing this was libressl BIGNUM don't always generate the correct values Vendoring based on swift-crypto implementation Remove support swift 5.0
2020-05-06 18:11:24 +01:00 · 2020-05-06 18:11:24 +01:00 · f00adf94f4
parent 7aa8e7c67d
commit f00adf94f4
205 changed files with 165846 additions and 85 deletions
--- a/.github/workflows/swift.yml
+++ b/.github/workflows/swift.yml
@ -18,7 +18,7 @@ jobs:
    runs-on: ubuntu-latest
    strategy:
      matrix:
-        tag: ['5.0', '5.1', '5.2']
+        tag: ['5.1', '5.2']
    container:
      image: swift:${{ matrix.tag }}
    steps:
--- a/.gitignore
+++ b/.gitignore
@ -4,3 +4,4 @@
 /Packages
 /*.xcodeproj
 xcuserdata/
+/.boringssl
--- a/Package.swift
+++ b/Package.swift
@ -1,4 +1,4 @@
-// swift-tools-version:5.0
+// swift-tools-version:5.1
 // The swift-tools-version declares the minimum version of Swift required to build this package.

 import PackageDescription
@ -8,22 +8,14 @@ let package = Package(
    products: [
        // Products define the executables and libraries produced by a package, and make them visible to other packages.
        .library(name: "BigNum", targets: ["BigNum"]),
+        /* This target is used only for symbol mangling. It's added and removed automatically because it emits build warnings. MANGLE_START
+            .library(name: "CBigNumBoringSSL", type: .static, targets: ["CBigNumBoringSSL"]),
+            MANGLE_END */
    ],
-    dependencies: [
-        // Dependencies declare other packages that this package depends on.
-        // .package(url: /* package url */, from: "1.0.0"),
-    ],
+    dependencies: [],
    targets: [
-        .target(name: "BigNum", dependencies: ["CBigNum"]),
-        .target(name: "CBigNum", dependencies: ["CBigNumOpenSSL"]),
-        .systemLibrary(
-            name: "CBigNumOpenSSL",
-            pkgConfig: "openssl",
-            providers: [
-                .apt(["openssl libssl-dev"]),
-                .brew(["openssl"])
-            ]
-        ),
+        .target(name: "BigNum", dependencies: ["CBigNumBoringSSL"]),
+        .target(name: "CBigNumBoringSSL"),
        .testTarget(name: "BigNumTests", dependencies: ["BigNum"]),
    ]
 )
--- a/Sources/BigNum/BigNum.swift
+++ b/Sources/BigNum/BigNum.swift
@ -4,51 +4,50 @@
 /// Inspired by the implementation here https://github.com/Bouke/Bignum
 ///

-import CBigNum
+@_implementationOnly import CBigNumBoringSSL
 import Foundation

 /// Swift wrapper class for BIGNUM functions in OpenSSL library
 public final class BigNum {
-    // ctx is an `OpaquePointer` because in OpenSSL 1.1 `BIGNUM` is an incomplete type. Still have to jump
-    // through hoops though because in other builds it is complete type and the compiler complains about
-    // casting to and from an OpaquePointer
+    // ctx is an `OpaquePointer` because CBigNumBoringSSL has been imported as implementation only. Still need
+    // to convert back and forth between `OpaquePointer` and `UnsafeMutablePointer<BIGNUM> though
    internal let ctx: OpaquePointer?
    
    public init() {
-        ctx = BN_new().convert()
+        ctx = CBigNumBoringSSL_BN_new().convert()
    }
    
    public init(_ int: Int) {
-        let ctx = BN_new()
+        let ctx = CBigNumBoringSSL_BN_new()
        withUnsafePointer(to: int.bigEndian) { bytes in
            let raw = UnsafeRawPointer(bytes)
            let p = raw.bindMemory(to: UInt8.self, capacity: MemoryLayout<Int>.size)
-            BN_bin2bn(p, Int32(MemoryLayout<Int>.size), ctx)
+            CBigNumBoringSSL_BN_bin2bn(p, Int(MemoryLayout<Int>.size), ctx)
        }
        self.ctx = ctx!.convert()
    }

    public init?(_ dec: String) {
-        var ctx = BN_new()
-        if BN_dec2bn(&ctx, dec) == 0 {
+        var ctx = CBigNumBoringSSL_BN_new()
+        if CBigNumBoringSSL_BN_dec2bn(&ctx, dec) == 0 {
            return nil
        }
        self.ctx = ctx!.convert()
    }

    public init?(hex: String) {
-        var ctx = BN_new()
-        if BN_hex2bn(&ctx, hex) == 0 {
+        var ctx = CBigNumBoringSSL_BN_new()
+        if CBigNumBoringSSL_BN_hex2bn(&ctx, hex) == 0 {
            return nil
        }
        self.ctx = ctx!.convert()
    }

    public init<D: ContiguousBytes>(bytes: D) {
-        let ctx = BN_new()
+        let ctx = CBigNumBoringSSL_BN_new()
        bytes.withUnsafeBytes { bytes in
            if let p = bytes.baseAddress?.assumingMemoryBound(to: UInt8.self) {
-                BN_bin2bn(p, .init(bytes.count), ctx)
+                CBigNumBoringSSL_BN_bin2bn(p, .init(bytes.count), ctx)
            }
        }
        self.ctx = ctx!.convert()
@ -56,48 +55,48 @@ public final class BigNum {

    @available(*, deprecated, message: "Please user init(bytes:) instead")
    public init<D: DataProtocol>(data: D) {
-        let ctx = BN_new()
+        let ctx = CBigNumBoringSSL_BN_new()
        if data.withContiguousStorageIfAvailable({bytes in
-            BN_bin2bn(bytes.baseAddress, .init(data.count), ctx)
+            CBigNumBoringSSL_BN_bin2bn(bytes.baseAddress, .init(data.count), ctx)
        }) == nil {
            var buffer = UnsafeMutableBufferPointer<UInt8>.allocate(capacity: data.count)
            data.copyBytes(to: buffer)
            defer { buffer.deallocate() }
-            BN_bin2bn(buffer.baseAddress, .init(data.count), ctx)
+            CBigNumBoringSSL_BN_bin2bn(buffer.baseAddress, .init(data.count), ctx)
        }
        self.ctx = ctx!.convert()
    }
    
    deinit {
-        BN_free(ctx?.convert())
+        CBigNumBoringSSL_BN_free(ctx?.convert())
    }

    public var data: Data {
-        var data = Data(count: Int((BN_num_bits(ctx?.convert()) + 7) / 8))
+        var data = Data(count: Int((CBigNumBoringSSL_BN_num_bits(ctx?.convert()) + 7) / 8))
        _ = data.withUnsafeMutableBytes { bytes in
            if let p = bytes.baseAddress?.assumingMemoryBound(to: UInt8.self) {
-                BN_bn2bin(ctx?.convert(), p)
+                CBigNumBoringSSL_BN_bn2bin(ctx?.convert(), p)
            }
        }
        return data
    }

    public var bytes: [UInt8] {
-        var bytes = [UInt8].init(repeating: 0, count: Int((BN_num_bits(ctx?.convert()) + 7) / 8))
+        var bytes = [UInt8].init(repeating: 0, count: Int((CBigNumBoringSSL_BN_num_bits(ctx?.convert()) + 7) / 8))
        _ = bytes.withUnsafeMutableBytes { bytes in
            if let p = bytes.baseAddress?.assumingMemoryBound(to: UInt8.self) {
-                BN_bn2bin(ctx?.convert(), p)
+                CBigNumBoringSSL_BN_bn2bin(ctx?.convert(), p)
            }
        }
        return bytes
    }

    public var dec: String {
-        return String(validatingUTF8: BN_bn2dec(ctx?.convert()))!
+        return String(validatingUTF8: CBigNumBoringSSL_BN_bn2dec(ctx?.convert()))!
    }

    public var hex: String {
-        return String(validatingUTF8: BN_bn2hex(ctx?.convert()))!
+        return String(validatingUTF8: CBigNumBoringSSL_BN_bn2hex(ctx?.convert()))!
    }
 }

@ -109,11 +108,11 @@ extension BigNum: CustomStringConvertible {

 extension BigNum: Comparable {
    public static func == (lhs: BigNum, rhs: BigNum) -> Bool {
-        return BN_cmp(lhs.ctx?.convert(), rhs.ctx?.convert()) == 0
+        return CBigNumBoringSSL_BN_cmp(lhs.ctx?.convert(), rhs.ctx?.convert()) == 0
    }

    public static func < (lhs: BigNum, rhs: BigNum) -> Bool {
-        return BN_cmp(lhs.ctx?.convert(), rhs.ctx?.convert()) == -1
+        return CBigNumBoringSSL_BN_cmp(lhs.ctx?.convert(), rhs.ctx?.convert()) == -1
    }
 }

@ -136,56 +135,56 @@ extension BigNum {

    static func operationWithCtx(_ block: (BigNum, OpaquePointer?) -> Int32) -> BigNum {
        let result = BigNum()
-        let context = BN_CTX_new()
+        let context = CBigNumBoringSSL_BN_CTX_new()
        precondition(block(result, context) == 1)
-        BN_CTX_free(context)
+        CBigNumBoringSSL_BN_CTX_free(context)
        return result
    }
 }

 public func + (lhs: BigNum, rhs: BigNum) -> BigNum {
    return BigNum.operation {
-        BN_add($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert())
+        CBigNumBoringSSL_BN_add($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert())
    }
 }

 public func - (lhs: BigNum, rhs: BigNum) -> BigNum {
    return BigNum.operation {
-        BN_sub($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert())
+        CBigNumBoringSSL_BN_sub($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert())
    }
 }

 public func * (lhs: BigNum, rhs: BigNum) -> BigNum {
    return BigNum.operationWithCtx {
-        BN_mul($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
+        CBigNumBoringSSL_BN_mul($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
    }
 }

 /// Returns lhs / rhs, rounded to zero.
 public func / (lhs: BigNum, rhs: BigNum) -> BigNum {
    return BigNum.operationWithCtx {
-        BN_div($0.ctx?.convert(), nil, lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
+        CBigNumBoringSSL_BN_div($0.ctx?.convert(), nil, lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
    }
 }

 /// Returns lhs / rhs, rounded to zero.
 public func % (lhs: BigNum, rhs: BigNum) -> BigNum {
    return BigNum.operationWithCtx {
-        BN_div(nil, $0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
+        CBigNumBoringSSL_BN_div(nil, $0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
    }
 }

 /// right shift
 public func >> (lhs: BigNum, shift: Int32) -> BigNum {
    return BigNum.operation {
-        BN_rshift($0.ctx?.convert(), lhs.ctx?.convert(), shift)
+        CBigNumBoringSSL_BN_rshift($0.ctx?.convert(), lhs.ctx?.convert(), shift)
    }
 }

 /// left shift
 public func << (lhs: BigNum, shift: Int32) -> BigNum {
    return BigNum.operation {
-        BN_lshift($0.ctx?.convert(), lhs.ctx?.convert(), shift)
+        CBigNumBoringSSL_BN_lshift($0.ctx?.convert(), lhs.ctx?.convert(), shift)
    }
 }

@ -195,111 +194,111 @@ public extension BigNum {

    static func += (lhs: inout BigNum, rhs: BigNum) {
        lhs = BigNum.operation {
-            BN_add($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert())
+            CBigNumBoringSSL_BN_add($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert())
        }
    }
    
    static func -= (lhs: inout BigNum, rhs: BigNum) {
        lhs = BigNum.operation {
-            BN_sub($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert())
+            CBigNumBoringSSL_BN_sub($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert())
        }
    }
    
    static func *= (lhs: inout BigNum, rhs: BigNum) {
        lhs = BigNum.operationWithCtx {
-            BN_mul($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
+            CBigNumBoringSSL_BN_mul($0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
        }
    }
    
    static func /= (lhs: inout BigNum, rhs: BigNum) {
        lhs = BigNum.operationWithCtx {
-            BN_div($0.ctx?.convert(), nil, lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
+            CBigNumBoringSSL_BN_div($0.ctx?.convert(), nil, lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
        }
    }
    
    static func %= (lhs: inout BigNum, rhs: BigNum) {
        lhs = BigNum.operationWithCtx {
-            BN_div(nil, $0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
+            CBigNumBoringSSL_BN_div(nil, $0.ctx?.convert(), lhs.ctx?.convert(), rhs.ctx?.convert(), $1)
        }
    }
    
    /// Returns: (self ** 2)
    func sqr() -> BigNum {
        return BigNum.operationWithCtx {
-            BN_sqr($0.ctx?.convert(), self.ctx?.convert(), $1)
+            CBigNumBoringSSL_BN_sqr($0.ctx?.convert(), self.ctx?.convert(), $1)
        }
    }

    /// Returns: (self ** p)
    func power(_ p: BigNum) -> BigNum {
        return BigNum.operationWithCtx {
-            BN_exp($0.ctx?.convert(), self.ctx?.convert(), p.ctx?.convert(), $1)
+            CBigNumBoringSSL_BN_exp($0.ctx?.convert(), self.ctx?.convert(), p.ctx?.convert(), $1)
        }
    }
    
    /// Returns: (self + b) % N
    func add(_ b: BigNum, modulus: BigNum) -> BigNum {
        return BigNum.operationWithCtx {
-            BN_mod_add($0.ctx?.convert(), self.ctx?.convert(), b.ctx?.convert(), modulus.ctx?.convert(), $1)
+            CBigNumBoringSSL_BN_mod_add($0.ctx?.convert(), self.ctx?.convert(), b.ctx?.convert(), modulus.ctx?.convert(), $1)
        }
    }

    /// Returns: (a - b) % N
    func sub(_ b: BigNum, modulus: BigNum) -> BigNum {
        return BigNum.operationWithCtx {
-            BN_mod_sub($0.ctx?.convert(), self.ctx?.convert(), b.ctx?.convert(), modulus.ctx?.convert(), $1)
+            CBigNumBoringSSL_BN_mod_sub($0.ctx?.convert(), self.ctx?.convert(), b.ctx?.convert(), modulus.ctx?.convert(), $1)
        }
    }

    /// Returns: (a * b) % N
    func mul(_ b: BigNum, modulus: BigNum) -> BigNum {
        return BigNum.operationWithCtx {
-            BN_mod_mul($0.ctx?.convert(), self.ctx?.convert(), b.ctx?.convert(), modulus.ctx?.convert(), $1)
+            CBigNumBoringSSL_BN_mod_mul($0.ctx?.convert(), self.ctx?.convert(), b.ctx?.convert(), modulus.ctx?.convert(), $1)
        }
    }

    /// Returns: (a ** 2) % N
    func sqr(modulus: BigNum) -> BigNum {
        return BigNum.operationWithCtx {
-            BN_mod_sqr($0.ctx?.convert(), self.ctx?.convert(), modulus.ctx?.convert(), $1)
+            CBigNumBoringSSL_BN_mod_sqr($0.ctx?.convert(), self.ctx?.convert(), modulus.ctx?.convert(), $1)
        }
    }

    /// Returns: (a ** p) % N
    func power(_ p: BigNum, modulus: BigNum) -> BigNum {
        return BigNum.operationWithCtx {
-            BN_mod_exp($0.ctx?.convert(), self.ctx?.convert(), p.ctx?.convert(), modulus.ctx?.convert(), $1)
+            CBigNumBoringSSL_BN_mod_exp($0.ctx?.convert(), self.ctx?.convert(), p.ctx?.convert(), modulus.ctx?.convert(), $1)
        }
    }
    
    /// Return greatest common denominator
    static func gcd(_ first: BigNum, _ second: BigNum) -> BigNum {
        return operationWithCtx {
-            BN_gcd($0.ctx?.convert(), first.ctx?.convert(), second.ctx?.convert(), $1)
+            CBigNumBoringSSL_BN_gcd($0.ctx?.convert(), first.ctx?.convert(), second.ctx?.convert(), $1)
        }
    }
    
    /// Bitwise operations

    func setBit(_ bit: Int32) {
-        BN_set_bit(self.ctx?.convert(), bit)
+        CBigNumBoringSSL_BN_set_bit(self.ctx?.convert(), bit)
    }
    
    func clearBit(_ bit: Int32) {
-        BN_clear_bit(self.ctx?.convert(), bit)
+        CBigNumBoringSSL_BN_clear_bit(self.ctx?.convert(), bit)
    }
    
    func mask(_ bits: Int32) {
-        BN_mask_bits(self.ctx?.convert(), bits)
+        CBigNumBoringSSL_BN_mask_bits(self.ctx?.convert(), bits)
    }

    func isBitSet(_ bit: Int32) -> Bool {
-        let set = BN_is_bit_set(self.ctx?.convert(), bit)
+        let set = CBigNumBoringSSL_BN_is_bit_set(self.ctx?.convert(), bit)
        return set == 1 ? true : false
    }

-    func numBits() -> Int32 {
-        return BN_num_bits(self.ctx?.convert())
+    func numBits() -> UInt32 {
+        return CBigNumBoringSSL_BN_num_bits(self.ctx?.convert())
    }
    
    /// random number generators
@ -313,45 +312,45 @@ public extension BigNum {
    /// return cryptographically strong random number of maximum size defined in bits. random needs seeding prior to be called
    static func random(bits: Int32, top: Top = .any, odd: Bool = false) -> BigNum {
        return operation {
-            BN_rand($0.ctx?.convert(), bits, top.rawValue, odd ? 1 : 0)
+            CBigNumBoringSSL_BN_rand($0.ctx?.convert(), bits, top.rawValue, odd ? 1 : 0)
        }
    }
    
    /// return pseudo random number of maximum size defined in bits.
    static func psuedo_random(bits: Int32, top: Top = .any, odd: Bool = false) -> BigNum {
        return operation {
-            BN_pseudo_rand($0.ctx?.convert(), bits, top.rawValue, odd ? 1 : 0)
+            CBigNumBoringSSL_BN_pseudo_rand($0.ctx?.convert(), bits, top.rawValue, odd ? 1 : 0)
        }
    }
    
    /// return cryptographically strong random number in range (0...max-1). random needs seeding prior to be called
    static func random(max: BigNum) -> BigNum {
        return operation {
-            BN_rand_range($0.ctx?.convert(), max.ctx?.convert())
+            CBigNumBoringSSL_BN_rand_range($0.ctx?.convert(), max.ctx?.convert())
        }
    }
    
    /// return pseudo random number in range (0..<max)
    static func psuedo_random(max: BigNum) -> BigNum {
        return operation {
-            BN_pseudo_rand_range($0.ctx?.convert(), max.ctx?.convert())
+            CBigNumBoringSSL_BN_pseudo_rand_range($0.ctx?.convert(), max.ctx?.convert())
        }
    }
    
    /// prime number generator
    static func generatePrime(bitSize: Int32, safe: Bool, add: BigNum? = nil, remainder: BigNum? = nil) -> BigNum {
        return operation {
-            BN_generate_prime_ex($0.ctx?.convert(), bitSize, safe ? 1 : 0, add?.ctx?.convert(), remainder?.ctx?.convert(), nil)
+            CBigNumBoringSSL_BN_generate_prime_ex($0.ctx?.convert(), bitSize, safe ? 1 : 0, add?.ctx?.convert(), remainder?.ctx?.convert(), nil)
        }
    }
    
    /// prime number generator
    func isPrime(numChecks: Int32) -> Bool {
-        let context = BN_CTX_new()
+        let context = CBigNumBoringSSL_BN_CTX_new()
        defer {
-            BN_CTX_free(context)
+            CBigNumBoringSSL_BN_CTX_free(context)
        }
-        return BN_is_prime_ex(self.ctx?.convert(), numChecks, context, nil) == 1
+        return CBigNumBoringSSL_BN_is_prime_ex(self.ctx?.convert(), numChecks, context, nil) == 1
    }
 }

--- a/Sources/CBigNum/c_big_num.c
+++ b/Sources/CBigNum/c_big_num.c
@ -1 +0,0 @@
-#include "include/c_big_num.h"
--- a/Sources/CBigNum/include/c_big_num.h
+++ b/Sources/CBigNum/include/c_big_num.h
@ -1 +0,0 @@
-#include <openssl/bn.h>
--- a/Sources/CBigNumBoringSSL/crypto/bio/bio.c
+++ b/Sources/CBigNumBoringSSL/crypto/bio/bio.c
@ -0,0 +1,700 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.] */
+
+#include <CBigNumBoringSSL_bio.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <string.h>
+
+#include <CBigNumBoringSSL_asn1.h>
+#include <CBigNumBoringSSL_err.h>
+#include <CBigNumBoringSSL_mem.h>
+#include <CBigNumBoringSSL_thread.h>
+
+#include "../internal.h"
+
+
+BIO *BIO_new(const BIO_METHOD *method) {
+  BIO *ret = OPENSSL_malloc(sizeof(BIO));
+  if (ret == NULL) {
+    OPENSSL_PUT_ERROR(BIO, ERR_R_MALLOC_FAILURE);
+    return NULL;
+  }
+
+  OPENSSL_memset(ret, 0, sizeof(BIO));
+  ret->method = method;
+  ret->shutdown = 1;
+  ret->references = 1;
+
+  if (method->create != NULL && !method->create(ret)) {
+    OPENSSL_free(ret);
+    return NULL;
+  }
+
+  return ret;
+}
+
+int BIO_free(BIO *bio) {
+  BIO *next_bio;
+
+  for (; bio != NULL; bio = next_bio) {
+    if (!CRYPTO_refcount_dec_and_test_zero(&bio->references)) {
+      return 0;
+    }
+
+    next_bio = BIO_pop(bio);
+
+    if (bio->method != NULL && bio->method->destroy != NULL) {
+      bio->method->destroy(bio);
+    }
+
+    OPENSSL_free(bio);
+  }
+  return 1;
+}
+
+int BIO_up_ref(BIO *bio) {
+  CRYPTO_refcount_inc(&bio->references);
+  return 1;
+}
+
+void BIO_vfree(BIO *bio) {
+  BIO_free(bio);
+}
+
+void BIO_free_all(BIO *bio) {
+  BIO_free(bio);
+}
+
+int BIO_read(BIO *bio, void *buf, int len) {
+  if (bio == NULL || bio->method == NULL || bio->method->bread == NULL) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
+    return -2;
+  }
+  if (!bio->init) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNINITIALIZED);
+    return -2;
+  }
+  if (len <= 0) {
+    return 0;
+  }
+  int ret = bio->method->bread(bio, buf, len);
+  if (ret > 0) {
+    bio->num_read += ret;
+  }
+  return ret;
+}
+
+int BIO_gets(BIO *bio, char *buf, int len) {
+  if (bio == NULL || bio->method == NULL || bio->method->bgets == NULL) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
+    return -2;
+  }
+  if (!bio->init) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNINITIALIZED);
+    return -2;
+  }
+  if (len <= 0) {
+    return 0;
+  }
+  int ret = bio->method->bgets(bio, buf, len);
+  if (ret > 0) {
+    bio->num_read += ret;
+  }
+  return ret;
+}
+
+int BIO_write(BIO *bio, const void *in, int inl) {
+  if (bio == NULL || bio->method == NULL || bio->method->bwrite == NULL) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
+    return -2;
+  }
+  if (!bio->init) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNINITIALIZED);
+    return -2;
+  }
+  if (inl <= 0) {
+    return 0;
+  }
+  int ret = bio->method->bwrite(bio, in, inl);
+  if (ret > 0) {
+    bio->num_write += ret;
+  }
+  return ret;
+}
+
+int BIO_write_all(BIO *bio, const void *data, size_t len) {
+  const uint8_t *data_u8 = data;
+  while (len > 0) {
+    int ret = BIO_write(bio, data_u8, len > INT_MAX ? INT_MAX : (int)len);
+    if (ret <= 0) {
+      return 0;
+    }
+    data_u8 += ret;
+    len -= ret;
+  }
+  return 1;
+}
+
+int BIO_puts(BIO *bio, const char *in) {
+  return BIO_write(bio, in, strlen(in));
+}
+
+int BIO_flush(BIO *bio) {
+  return BIO_ctrl(bio, BIO_CTRL_FLUSH, 0, NULL);
+}
+
+long BIO_ctrl(BIO *bio, int cmd, long larg, void *parg) {
+  if (bio == NULL) {
+    return 0;
+  }
+
+  if (bio->method == NULL || bio->method->ctrl == NULL) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
+    return -2;
+  }
+
+  return bio->method->ctrl(bio, cmd, larg, parg);
+}
+
+char *BIO_ptr_ctrl(BIO *b, int cmd, long larg) {
+  char *p = NULL;
+
+  if (BIO_ctrl(b, cmd, larg, (void *)&p) <= 0) {
+    return NULL;
+  }
+
+  return p;
+}
+
+long BIO_int_ctrl(BIO *b, int cmd, long larg, int iarg) {
+  int i = iarg;
+
+  return BIO_ctrl(b, cmd, larg, (void *)&i);
+}
+
+int BIO_reset(BIO *bio) {
+  return BIO_ctrl(bio, BIO_CTRL_RESET, 0, NULL);
+}
+
+int BIO_eof(BIO *bio) {
+  return BIO_ctrl(bio, BIO_CTRL_EOF, 0, NULL);
+}
+
+void BIO_set_flags(BIO *bio, int flags) {
+  bio->flags |= flags;
+}
+
+int BIO_test_flags(const BIO *bio, int flags) {
+  return bio->flags & flags;
+}
+
+int BIO_should_read(const BIO *bio) {
+  return BIO_test_flags(bio, BIO_FLAGS_READ);
+}
+
+int BIO_should_write(const BIO *bio) {
+  return BIO_test_flags(bio, BIO_FLAGS_WRITE);
+}
+
+int BIO_should_retry(const BIO *bio) {
+  return BIO_test_flags(bio, BIO_FLAGS_SHOULD_RETRY);
+}
+
+int BIO_should_io_special(const BIO *bio) {
+  return BIO_test_flags(bio, BIO_FLAGS_IO_SPECIAL);
+}
+
+int BIO_get_retry_reason(const BIO *bio) { return bio->retry_reason; }
+
+void BIO_clear_flags(BIO *bio, int flags) {
+  bio->flags &= ~flags;
+}
+
+void BIO_set_retry_read(BIO *bio) {
+  bio->flags |= BIO_FLAGS_READ | BIO_FLAGS_SHOULD_RETRY;
+}
+
+void BIO_set_retry_write(BIO *bio) {
+  bio->flags |= BIO_FLAGS_WRITE | BIO_FLAGS_SHOULD_RETRY;
+}
+
+static const int kRetryFlags = BIO_FLAGS_RWS | BIO_FLAGS_SHOULD_RETRY;
+
+int BIO_get_retry_flags(BIO *bio) {
+  return bio->flags & kRetryFlags;
+}
+
+void BIO_clear_retry_flags(BIO *bio) {
+  bio->flags &= ~kRetryFlags;
+  bio->retry_reason = 0;
+}
+
+int BIO_method_type(const BIO *bio) { return bio->method->type; }
+
+void BIO_copy_next_retry(BIO *bio) {
+  BIO_clear_retry_flags(bio);
+  BIO_set_flags(bio, BIO_get_retry_flags(bio->next_bio));
+  bio->retry_reason = bio->next_bio->retry_reason;
+}
+
+long BIO_callback_ctrl(BIO *bio, int cmd, bio_info_cb fp) {
+  if (bio == NULL) {
+    return 0;
+  }
+
+  if (bio->method == NULL || bio->method->callback_ctrl == NULL) {
+    OPENSSL_PUT_ERROR(BIO, BIO_R_UNSUPPORTED_METHOD);
+    return 0;
+  }
+
+  return bio->method->callback_ctrl(bio, cmd, fp);
+}
+
+size_t BIO_pending(const BIO *bio) {
+  const long r = BIO_ctrl((BIO *) bio, BIO_CTRL_PENDING, 0, NULL);
+  assert(r >= 0);
+
+  if (r < 0) {
+    return 0;
+  }
+  return r;
+}
+
+size_t BIO_ctrl_pending(const BIO *bio) {
+  return BIO_pending(bio);
+}
+
+size_t BIO_wpending(const BIO *bio) {
+  const long r = BIO_ctrl((BIO *) bio, BIO_CTRL_WPENDING, 0, NULL);
+  assert(r >= 0);
+
+  if (r < 0) {
+    return 0;
+  }
+  return r;
+}
+
+int BIO_set_close(BIO *bio, int close_flag) {
+  return BIO_ctrl(bio, BIO_CTRL_SET_CLOSE, close_flag, NULL);
+}
+
+OPENSSL_EXPORT size_t BIO_number_read(const BIO *bio) {
+  return bio->num_read;
+}
+
+OPENSSL_EXPORT size_t BIO_number_written(const BIO *bio) {
+  return bio->num_write;
+}
+
+BIO *BIO_push(BIO *bio, BIO *appended_bio) {
+  BIO *last_bio;
+
+  if (bio == NULL) {
+    return bio;
+  }
+
+  last_bio = bio;
+  while (last_bio->next_bio != NULL) {
+    last_bio = last_bio->next_bio;
+  }
+
+  last_bio->next_bio = appended_bio;
+  return bio;
+}
+
+BIO *BIO_pop(BIO *bio) {
+  BIO *ret;
+
+  if (bio == NULL) {
+    return NULL;
+  }
+  ret = bio->next_bio;
+  bio->next_bio = NULL;
+  return ret;
+}
+
+BIO *BIO_next(BIO *bio) {
+  if (!bio) {
+    return NULL;
+  }
+  return bio->next_bio;
+}
+
+BIO *BIO_find_type(BIO *bio, int type) {
+  int method_type, mask;
+
+  if (!bio) {
+    return NULL;
+  }
+  mask = type & 0xff;
+
+  do {
+    if (bio->method != NULL) {
+      method_type = bio->method->type;
+
+      if (!mask) {
+        if (method_type & type) {
+          return bio;
+        }
+      } else if (method_type == type) {
+        return bio;
+      }
+    }
+    bio = bio->next_bio;
+  } while (bio != NULL);
+
+  return NULL;
+}
+
+int BIO_indent(BIO *bio, unsigned indent, unsigned max_indent) {
+  if (indent > max_indent) {
+    indent = max_indent;
+  }
+
+  while (indent--) {
+    if (BIO_puts(bio, " ") != 1) {
+      return 0;
+    }
+  }
+  return 1;
+}
+
+static int print_bio(const char *str, size_t len, void *bio) {
+  return BIO_write((BIO *)bio, str, len);
+}
+
+void ERR_print_errors(BIO *bio) {
+  ERR_print_errors_cb(print_bio, bio);
+}
+
+// bio_read_all reads everything from |bio| and prepends |prefix| to it. On
+// success, |*out| is set to an allocated buffer (which should be freed with
+// |OPENSSL_free|), |*out_len| is set to its length and one is returned. The
+// buffer will contain |prefix| followed by the contents of |bio|. On failure,
+// zero is returned.
+//
+// The function will fail if the size of the output would equal or exceed
+// |max_len|.
+static int bio_read_all(BIO *bio, uint8_t **out, size_t *out_len,
+                        const uint8_t *prefix, size_t prefix_len,
+                        size_t max_len) {
+  static const size_t kChunkSize = 4096;
+
+  size_t len = prefix_len + kChunkSize;
+  if (len > max_len) {
+    len = max_len;
+  }
+  if (len < prefix_len) {
+    return 0;
+  }
+  *out = OPENSSL_malloc(len);
+  if (*out == NULL) {
+    return 0;
+  }
+  OPENSSL_memcpy(*out, prefix, prefix_len);
+  size_t done = prefix_len;
+
+  for (;;) {
+    if (done == len) {
+      OPENSSL_free(*out);
+      return 0;
+    }
+    const size_t todo = len - done;
+    assert(todo < INT_MAX);
+    const int n = BIO_read(bio, *out + done, todo);
+    if (n == 0) {
+      *out_len = done;
+      return 1;
+    } else if (n == -1) {
+      OPENSSL_free(*out);
+      return 0;
+    }
+
+    done += n;
+    if (len < max_len && len - done < kChunkSize / 2) {
+      len += kChunkSize;
+      if (len < kChunkSize || len > max_len) {
+        len = max_len;
+      }
+      uint8_t *new_buf = OPENSSL_realloc(*out, len);
+      if (new_buf == NULL) {
+        OPENSSL_free(*out);
+        return 0;
+      }
+      *out = new_buf;
+    }
+  }
+}
+
+// bio_read_full reads |len| bytes |bio| and writes them into |out|. It
+// tolerates partial reads from |bio| and returns one on success or zero if a
+// read fails before |len| bytes are read. On failure, it additionally sets
+// |*out_eof_on_first_read| to whether the error was due to |bio| returning zero
+// on the first read. |out_eof_on_first_read| may be NULL to discard the value.
+static int bio_read_full(BIO *bio, uint8_t *out, int *out_eof_on_first_read,
+                         size_t len) {
+  int first_read = 1;
+  while (len > 0) {
+    int todo = len <= INT_MAX ? (int)len : INT_MAX;
+    int ret = BIO_read(bio, out, todo);
+    if (ret <= 0) {
+      if (out_eof_on_first_read != NULL) {
+        *out_eof_on_first_read = first_read && ret == 0;
+      }
+      return 0;
+    }
+    out += ret;
+    len -= (size_t)ret;
+    first_read = 0;
+  }
+
+  return 1;
+}
+
+// For compatibility with existing |d2i_*_bio| callers, |BIO_read_asn1| uses
+// |ERR_LIB_ASN1| errors.
+OPENSSL_DECLARE_ERROR_REASON(ASN1, ASN1_R_DECODE_ERROR)
+OPENSSL_DECLARE_ERROR_REASON(ASN1, ASN1_R_HEADER_TOO_LONG)
+OPENSSL_DECLARE_ERROR_REASON(ASN1, ASN1_R_NOT_ENOUGH_DATA)
+OPENSSL_DECLARE_ERROR_REASON(ASN1, ASN1_R_TOO_LONG)
+
+int BIO_read_asn1(BIO *bio, uint8_t **out, size_t *out_len, size_t max_len) {
+  uint8_t header[6];
+
+  static const size_t kInitialHeaderLen = 2;
+  int eof_on_first_read;
+  if (!bio_read_full(bio, header, &eof_on_first_read, kInitialHeaderLen)) {
+    if (eof_on_first_read) {
+      // Historically, OpenSSL returned |ASN1_R_HEADER_TOO_LONG| when
+      // |d2i_*_bio| could not read anything. CPython conditions on this to
+      // determine if |bio| was empty.
+      OPENSSL_PUT_ERROR(ASN1, ASN1_R_HEADER_TOO_LONG);
+    } else {
+      OPENSSL_PUT_ERROR(ASN1, ASN1_R_NOT_ENOUGH_DATA);
+    }
+    return 0;
+  }
+
+  const uint8_t tag = header[0];
+  const uint8_t length_byte = header[1];
+
+  if ((tag & 0x1f) == 0x1f) {
+    // Long form tags are not supported.
+    OPENSSL_PUT_ERROR(ASN1, ASN1_R_DECODE_ERROR);
+    return 0;
+  }
+
+  size_t len, header_len;
+  if ((length_byte & 0x80) == 0) {
+    // Short form length.
+    len = length_byte;
+    header_len = kInitialHeaderLen;
+  } else {
+    const size_t num_bytes = length_byte & 0x7f;
+
+    if ((tag & 0x20 /* constructed */) != 0 && num_bytes == 0) {
+      // indefinite length.
+      if (!bio_read_all(bio, out, out_len, header, kInitialHeaderLen,
+                        max_len)) {
+        OPENSSL_PUT_ERROR(ASN1, ASN1_R_NOT_ENOUGH_DATA);
+        return 0;
+      }
+      return 1;
+    }
+
+    if (num_bytes == 0 || num_bytes > 4) {
+      OPENSSL_PUT_ERROR(ASN1, ASN1_R_DECODE_ERROR);
+      return 0;
+    }
+
+    if (!bio_read_full(bio, header + kInitialHeaderLen, NULL, num_bytes)) {
+      OPENSSL_PUT_ERROR(ASN1, ASN1_R_NOT_ENOUGH_DATA);
+      return 0;
+    }
+    header_len = kInitialHeaderLen + num_bytes;
+
+    uint32_t len32 = 0;
+    for (unsigned i = 0; i < num_bytes; i++) {
+      len32 <<= 8;
+      len32 |= header[kInitialHeaderLen + i];
+    }
+
+    if (len32 < 128) {
+      // Length should have used short-form encoding.
+      OPENSSL_PUT_ERROR(ASN1, ASN1_R_DECODE_ERROR);
+      return 0;
+    }
+
+    if ((len32 >> ((num_bytes-1)*8)) == 0) {
+      // Length should have been at least one byte shorter.
+      OPENSSL_PUT_ERROR(ASN1, ASN1_R_DECODE_ERROR);
+      return 0;
+    }
+
+    len = len32;
+  }
+
+  if (len + header_len < len ||
+      len + header_len > max_len ||
+      len > INT_MAX) {
+    OPENSSL_PUT_ERROR(ASN1, ASN1_R_TOO_LONG);
+    return 0;
+  }
+  len += header_len;
+  *out_len = len;
+
+  *out = OPENSSL_malloc(len);
+  if (*out == NULL) {
+    OPENSSL_PUT_ERROR(ASN1, ERR_R_MALLOC_FAILURE);
+    return 0;
+  }
+  OPENSSL_memcpy(*out, header, header_len);
+  if (!bio_read_full(bio, (*out) + header_len, NULL, len - header_len)) {
+    OPENSSL_PUT_ERROR(ASN1, ASN1_R_NOT_ENOUGH_DATA);
+    OPENSSL_free(*out);
+    return 0;
+  }
+
+  return 1;
+}
+
+void BIO_set_retry_special(BIO *bio) {
+  bio->flags |= BIO_FLAGS_READ | BIO_FLAGS_IO_SPECIAL;
+}
+
+int BIO_set_write_buffer_size(BIO *bio, int buffer_size) { return 0; }
+
+static struct CRYPTO_STATIC_MUTEX g_index_lock = CRYPTO_STATIC_MUTEX_INIT;
+static int g_index = BIO_TYPE_START;
+
+int BIO_get_new_index(void) {
+  CRYPTO_STATIC_MUTEX_lock_write(&g_index_lock);
+  // If |g_index| exceeds 255, it will collide with the flags bits.
+  int ret = g_index > 255 ? -1 : g_index++;
+  CRYPTO_STATIC_MUTEX_unlock_write(&g_index_lock);
+  return ret;
+}
+
+BIO_METHOD *BIO_meth_new(int type, const char *name) {
+  BIO_METHOD *method = OPENSSL_malloc(sizeof(BIO_METHOD));
+  if (method == NULL) {
+    return NULL;
+  }
+  OPENSSL_memset(method, 0, sizeof(BIO_METHOD));
+  method->type = type;
+  method->name = name;
+  return method;
+}
+
+void BIO_meth_free(BIO_METHOD *method) {
+  OPENSSL_free(method);
+}
+
+int BIO_meth_set_create(BIO_METHOD *method,
+                        int (*create)(BIO *)) {
+  method->create = create;
+  return 1;
+}
+
+int BIO_meth_set_destroy(BIO_METHOD *method,
+                         int (*destroy)(BIO *)) {
+  method->destroy = destroy;
+  return 1;
+}
+
+int BIO_meth_set_write(BIO_METHOD *method,
+                       int (*write)(BIO *, const char *, int)) {
+  method->bwrite = write;
+  return 1;
+}
+
+int BIO_meth_set_read(BIO_METHOD *method,
+                      int (*read)(BIO *, char *, int)) {
+  method->bread = read;
+  return 1;
+}
+
+int BIO_meth_set_gets(BIO_METHOD *method,
+                      int (*gets)(BIO *, char *, int)) {
+  method->bgets = gets;
+  return 1;
+}
+
+int BIO_meth_set_ctrl(BIO_METHOD *method,
+                      long (*ctrl)(BIO *, int, long, void *)) {
+  method->ctrl = ctrl;
+  return 1;
+}
+
+void BIO_set_data(BIO *bio, void *ptr) { bio->ptr = ptr; }
+
+void *BIO_get_data(BIO *bio) { return bio->ptr; }
+
+void BIO_set_init(BIO *bio, int init) { bio->init = init; }
+
+int BIO_get_init(BIO *bio) { return bio->init; }
+
+void BIO_set_shutdown(BIO *bio, int shutdown) { bio->shutdown = shutdown; }
+
+int BIO_get_shutdown(BIO *bio) { return bio->shutdown; }
+
+int BIO_meth_set_puts(BIO_METHOD *method, int (*puts)(BIO *, const char *)) {
+  // Ignore the parameter. We implement |BIO_puts| using |BIO_write|.
+  return 1;
+}
--- a/Sources/CBigNumBoringSSL/crypto/bio/file.c
+++ b/Sources/CBigNumBoringSSL/crypto/bio/file.c
@ -0,0 +1,317 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.] */
+
+#if defined(__linux) || defined(__sun) || defined(__hpux)
+// Following definition aliases fopen to fopen64 on above mentioned
+// platforms. This makes it possible to open and sequentially access
+// files larger than 2GB from 32-bit application. It does not allow to
+// traverse them beyond 2GB with fseek/ftell, but on the other hand *no*
+// 32-bit platform permits that, not with fseek/ftell. Not to mention
+// that breaking 2GB limit for seeking would require surgery to *our*
+// API. But sequential access suffices for practical cases when you
+// can run into large files, such as fingerprinting, so we can let API
+// alone. For reference, the list of 32-bit platforms which allow for
+// sequential access of large files without extra "magic" comprise *BSD,
+// Darwin, IRIX...
+#ifndef _FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 64
+#endif
+#endif
+
+#include <CBigNumBoringSSL_bio.h>
+
+#if !defined(OPENSSL_TRUSTY)
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <CBigNumBoringSSL_err.h>
+#include <CBigNumBoringSSL_mem.h>
+
+#include "../internal.h"
+
+
+#define BIO_FP_READ 0x02
+#define BIO_FP_WRITE 0x04
+#define BIO_FP_APPEND 0x08
+
+BIO *BIO_new_file(const char *filename, const char *mode) {
+  BIO *ret;
+  FILE *file;
+
+  file = fopen(filename, mode);
+  if (file == NULL) {
+    OPENSSL_PUT_SYSTEM_ERROR();
+
+    ERR_add_error_data(5, "fopen('", filename, "','", mode, "')");
+    if (errno == ENOENT) {
+      OPENSSL_PUT_ERROR(BIO, BIO_R_NO_SUCH_FILE);
+    } else {
+      OPENSSL_PUT_ERROR(BIO, BIO_R_SYS_LIB);
+    }
+    return NULL;
+  }
+
+  ret = BIO_new_fp(file, BIO_CLOSE);
+  if (ret == NULL) {
+    fclose(file);
+    return NULL;
+  }
+
+  return ret;
+}
+
+BIO *BIO_new_fp(FILE *stream, int close_flag) {
+  BIO *ret = BIO_new(BIO_s_file());
+
+  if (ret == NULL) {
+    return NULL;
+  }
+
+  BIO_set_fp(ret, stream, close_flag);
+  return ret;
+}
+
+static int file_new(BIO *bio) { return 1; }
+
+static int file_free(BIO *bio) {
+  if (bio == NULL) {
+    return 0;
+  }
+
+  if (!bio->shutdown) {
+    return 1;
+  }
+
+  if (bio->init && bio->ptr != NULL) {
+    fclose(bio->ptr);
+    bio->ptr = NULL;
+  }
+  bio->init = 0;
+
+  return 1;
+}
+
+static int file_read(BIO *b, char *out, int outl) {
+  if (!b->init) {
+    return 0;
+  }
+
+  size_t ret = fread(out, 1, outl, (FILE *)b->ptr);
+  if (ret == 0 && ferror((FILE *)b->ptr)) {
+    OPENSSL_PUT_SYSTEM_ERROR();
+    OPENSSL_PUT_ERROR(BIO, ERR_R_SYS_LIB);
+    return -1;
+  }
+
+  // fread reads at most |outl| bytes, so |ret| fits in an int.
+  return (int)ret;
+}
+
+static int file_write(BIO *b, const char *in, int inl) {
+  int ret = 0;
+
+  if (!b->init) {
+    return 0;
+  }
+
+  ret = fwrite(in, inl, 1, (FILE *)b->ptr);
+  if (ret > 0) {
+    ret = inl;
+  }
+  return ret;
+}
+
+static long file_ctrl(BIO *b, int cmd, long num, void *ptr) {
+  long ret = 1;
+  FILE *fp = (FILE *)b->ptr;
+  FILE **fpp;
+  char p[4];
+
+  switch (cmd) {
+    case BIO_CTRL_RESET:
+      num = 0;
+      OPENSSL_FALLTHROUGH;
+    case BIO_C_FILE_SEEK:
+      ret = (long)fseek(fp, num, 0);
+      break;
+    case BIO_CTRL_EOF:
+      ret = (long)feof(fp);
+      break;
+    case BIO_C_FILE_TELL:
+    case BIO_CTRL_INFO:
+      ret = ftell(fp);
+      break;
+    case BIO_C_SET_FILE_PTR:
+      file_free(b);
+      b->shutdown = (int)num & BIO_CLOSE;
+      b->ptr = ptr;
+      b->init = 1;
+      break;
+    case BIO_C_SET_FILENAME:
+      file_free(b);
+      b->shutdown = (int)num & BIO_CLOSE;
+      if (num & BIO_FP_APPEND) {
+        if (num & BIO_FP_READ) {
+          OPENSSL_strlcpy(p, "a+", sizeof(p));
+        } else {
+          OPENSSL_strlcpy(p, "a", sizeof(p));
+        }
+      } else if ((num & BIO_FP_READ) && (num & BIO_FP_WRITE)) {
+        OPENSSL_strlcpy(p, "r+", sizeof(p));
+      } else if (num & BIO_FP_WRITE) {
+        OPENSSL_strlcpy(p, "w", sizeof(p));
+      } else if (num & BIO_FP_READ) {
+        OPENSSL_strlcpy(p, "r", sizeof(p));
+      } else {
+        OPENSSL_PUT_ERROR(BIO, BIO_R_BAD_FOPEN_MODE);
+        ret = 0;
+        break;
+      }
+      fp = fopen(ptr, p);
+      if (fp == NULL) {
+        OPENSSL_PUT_SYSTEM_ERROR();
+        ERR_add_error_data(5, "fopen('", ptr, "','", p, "')");
+        OPENSSL_PUT_ERROR(BIO, ERR_R_SYS_LIB);
+        ret = 0;
+        break;
+      }
+      b->ptr = fp;
+      b->init = 1;
+      break;
+    case BIO_C_GET_FILE_PTR:
+      // the ptr parameter is actually a FILE ** in this case.
+      if (ptr != NULL) {
+        fpp = (FILE **)ptr;
+        *fpp = (FILE *)b->ptr;
+      }
+      break;
+    case BIO_CTRL_GET_CLOSE:
+      ret = (long)b->shutdown;
+      break;
+    case BIO_CTRL_SET_CLOSE:
+      b->shutdown = (int)num;
+      break;
+    case BIO_CTRL_FLUSH:
+      ret = 0 == fflush((FILE *)b->ptr);
+      break;
+    case BIO_CTRL_WPENDING:
+    case BIO_CTRL_PENDING:
+    default:
+      ret = 0;
+      break;
+  }
+  return ret;
+}
+
+static int file_gets(BIO *bp, char *buf, int size) {
+  int ret = 0;
+
+  if (size == 0) {
+    return 0;
+  }
+
+  if (!fgets(buf, size, (FILE *)bp->ptr)) {
+    buf[0] = 0;
+    goto err;
+  }
+  ret = strlen(buf);
+
+err:
+  return ret;
+}
+
+static const BIO_METHOD methods_filep = {
+    BIO_TYPE_FILE,   "FILE pointer",
+    file_write,      file_read,
+    NULL /* puts */, file_gets,
+    file_ctrl,       file_new,
+    file_free,       NULL /* callback_ctrl */,
+};
+
+const BIO_METHOD *BIO_s_file(void) { return &methods_filep; }
+
+
+int BIO_get_fp(BIO *bio, FILE **out_file) {
+  return BIO_ctrl(bio, BIO_C_GET_FILE_PTR, 0, (char*) out_file);
+}
+
+int BIO_set_fp(BIO *bio, FILE *file, int close_flag) {
+  return BIO_ctrl(bio, BIO_C_SET_FILE_PTR, close_flag, (char *) file);
+}
+
+int BIO_read_filename(BIO *bio, const char *filename) {
+  return BIO_ctrl(bio, BIO_C_SET_FILENAME, BIO_CLOSE | BIO_FP_READ,
+                  (char *)filename);
+}
+
+int BIO_write_filename(BIO *bio, const char *filename) {
+  return BIO_ctrl(bio, BIO_C_SET_FILENAME, BIO_CLOSE | BIO_FP_WRITE,
+                  (char *)filename);
+}
+
+int BIO_append_filename(BIO *bio, const char *filename) {
+  return BIO_ctrl(bio, BIO_C_SET_FILENAME, BIO_CLOSE | BIO_FP_APPEND,
+                  (char *)filename);
+}
+
+int BIO_rw_filename(BIO *bio, const char *filename) {
+  return BIO_ctrl(bio, BIO_C_SET_FILENAME,
+                  BIO_CLOSE | BIO_FP_READ | BIO_FP_WRITE, (char *)filename);
+}
+
+#endif  // OPENSSL_TRUSTY
--- a/Sources/CBigNumBoringSSL/crypto/bn_extra/convert.c
+++ b/Sources/CBigNumBoringSSL/crypto/bn_extra/convert.c
@ -0,0 +1,470 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.] */
+
+#include <CBigNumBoringSSL_bn.h>
+
+#include <assert.h>
+#include <ctype.h>
+#include <limits.h>
+#include <stdio.h>
+
+#include <CBigNumBoringSSL_bio.h>
+#include <CBigNumBoringSSL_bytestring.h>
+#include <CBigNumBoringSSL_err.h>
+#include <CBigNumBoringSSL_mem.h>
+
+#include "../fipsmodule/bn/internal.h"
+
+
+int BN_bn2cbb_padded(CBB *out, size_t len, const BIGNUM *in) {
+  uint8_t *ptr;
+  return CBB_add_space(out, &ptr, len) && BN_bn2bin_padded(ptr, len, in);
+}
+
+static const char hextable[] = "0123456789abcdef";
+
+char *BN_bn2hex(const BIGNUM *bn) {
+  int width = bn_minimal_width(bn);
+  char *buf = OPENSSL_malloc(1 /* leading '-' */ + 1 /* zero is non-empty */ +
+                             width * BN_BYTES * 2 + 1 /* trailing NUL */);
+  if (buf == NULL) {
+    OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
+    return NULL;
+  }
+
+  char *p = buf;
+  if (bn->neg) {
+    *(p++) = '-';
+  }
+
+  if (BN_is_zero(bn)) {
+    *(p++) = '0';
+  }
+
+  int z = 0;
+  for (int i = width - 1; i >= 0; i--) {
+    for (int j = BN_BITS2 - 8; j >= 0; j -= 8) {
+      // strip leading zeros
+      int v = ((int)(bn->d[i] >> (long)j)) & 0xff;
+      if (z || v != 0) {
+        *(p++) = hextable[v >> 4];
+        *(p++) = hextable[v & 0x0f];
+        z = 1;
+      }
+    }
+  }
+  *p = '\0';
+
+  return buf;
+}
+
+// decode_hex decodes |in_len| bytes of hex data from |in| and updates |bn|.
+static int decode_hex(BIGNUM *bn, const char *in, int in_len) {
+  if (in_len > INT_MAX/4) {
+    OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
+    return 0;
+  }
+  // |in_len| is the number of hex digits.
+  if (!bn_expand(bn, in_len * 4)) {
+    return 0;
+  }
+
+  int i = 0;
+  while (in_len > 0) {
+    // Decode one |BN_ULONG| at a time.
+    int todo = BN_BYTES * 2;
+    if (todo > in_len) {
+      todo = in_len;
+    }
+
+    BN_ULONG word = 0;
+    int j;
+    for (j = todo; j > 0; j--) {
+      char c = in[in_len - j];
+
+      BN_ULONG hex;
+      if (c >= '0' && c <= '9') {
+        hex = c - '0';
+      } else if (c >= 'a' && c <= 'f') {
+        hex = c - 'a' + 10;
+      } else if (c >= 'A' && c <= 'F') {
+        hex = c - 'A' + 10;
+      } else {
+        hex = 0;
+        // This shouldn't happen. The caller checks |isxdigit|.
+        assert(0);
+      }
+      word = (word << 4) | hex;
+    }
+
+    bn->d[i++] = word;
+    in_len -= todo;
+  }
+  assert(i <= bn->dmax);
+  bn->width = i;
+  return 1;
+}
+
+// decode_dec decodes |in_len| bytes of decimal data from |in| and updates |bn|.
+static int decode_dec(BIGNUM *bn, const char *in, int in_len) {
+  int i, j;
+  BN_ULONG l = 0;
+
+  // Decode |BN_DEC_NUM| digits at a time.
+  j = BN_DEC_NUM - (in_len % BN_DEC_NUM);
+  if (j == BN_DEC_NUM) {
+    j = 0;
+  }
+  l = 0;
+  for (i = 0; i < in_len; i++) {
+    l *= 10;
+    l += in[i] - '0';
+    if (++j == BN_DEC_NUM) {
+      if (!BN_mul_word(bn, BN_DEC_CONV) ||
+          !BN_add_word(bn, l)) {
+        return 0;
+      }
+      l = 0;
+      j = 0;
+    }
+  }
+  return 1;
+}
+
+typedef int (*decode_func) (BIGNUM *bn, const char *in, int in_len);
+typedef int (*char_test_func) (int c);
+
+static int bn_x2bn(BIGNUM **outp, const char *in, decode_func decode, char_test_func want_char) {
+  BIGNUM *ret = NULL;
+  int neg = 0, i;
+  int num;
+
+  if (in == NULL || *in == 0) {
+    return 0;
+  }
+
+  if (*in == '-') {
+    neg = 1;
+    in++;
+  }
+
+  for (i = 0; want_char((unsigned char)in[i]) && i + neg < INT_MAX; i++) {}
+
+  num = i + neg;
+  if (outp == NULL) {
+    return num;
+  }
+
+  // in is the start of the hex digits, and it is 'i' long
+  if (*outp == NULL) {
+    ret = BN_new();
+    if (ret == NULL) {
+      return 0;
+    }
+  } else {
+    ret = *outp;
+    BN_zero(ret);
+  }
+
+  if (!decode(ret, in, i)) {
+    goto err;
+  }
+
+  bn_set_minimal_width(ret);
+  if (!BN_is_zero(ret)) {
+    ret->neg = neg;
+  }
+
+  *outp = ret;
+  return num;
+
+err:
+  if (*outp == NULL) {
+    BN_free(ret);
+  }
+
+  return 0;
+}
+
+int BN_hex2bn(BIGNUM **outp, const char *in) {
+  return bn_x2bn(outp, in, decode_hex, isxdigit);
+}
+
+char *BN_bn2dec(const BIGNUM *a) {
+  // It is easier to print strings little-endian, so we assemble it in reverse
+  // and fix at the end.
+  BIGNUM *copy = NULL;
+  CBB cbb;
+  if (!CBB_init(&cbb, 16) ||
+      !CBB_add_u8(&cbb, 0 /* trailing NUL */)) {
+    goto cbb_err;
+  }
+
+  if (BN_is_zero(a)) {
+    if (!CBB_add_u8(&cbb, '0')) {
+      goto cbb_err;
+    }
+  } else {
+    copy = BN_dup(a);
+    if (copy == NULL) {
+      goto err;
+    }
+
+    while (!BN_is_zero(copy)) {
+      BN_ULONG word = BN_div_word(copy, BN_DEC_CONV);
+      if (word == (BN_ULONG)-1) {
+        goto err;
+      }
+
+      const int add_leading_zeros = !BN_is_zero(copy);
+      for (int i = 0; i < BN_DEC_NUM && (add_leading_zeros || word != 0); i++) {
+        if (!CBB_add_u8(&cbb, '0' + word % 10)) {
+          goto cbb_err;
+        }
+        word /= 10;
+      }
+      assert(word == 0);
+    }
+  }
+
+  if (BN_is_negative(a) &&
+      !CBB_add_u8(&cbb, '-')) {
+    goto cbb_err;
+  }
+
+  uint8_t *data;
+  size_t len;
+  if (!CBB_finish(&cbb, &data, &len)) {
+    goto cbb_err;
+  }
+
+  // Reverse the buffer.
+  for (size_t i = 0; i < len/2; i++) {
+    uint8_t tmp = data[i];
+    data[i] = data[len - 1 - i];
+    data[len - 1 - i] = tmp;
+  }
+
+  BN_free(copy);
+  return (char *)data;
+
+cbb_err:
+  OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
+err:
+  BN_free(copy);
+  CBB_cleanup(&cbb);
+  return NULL;
+}
+
+int BN_dec2bn(BIGNUM **outp, const char *in) {
+  return bn_x2bn(outp, in, decode_dec, isdigit);
+}
+
+int BN_asc2bn(BIGNUM **outp, const char *in) {
+  const char *const orig_in = in;
+  if (*in == '-') {
+    in++;
+  }
+
+  if (in[0] == '0' && (in[1] == 'X' || in[1] == 'x')) {
+    if (!BN_hex2bn(outp, in+2)) {
+      return 0;
+    }
+  } else {
+    if (!BN_dec2bn(outp, in)) {
+      return 0;
+    }
+  }
+
+  if (*orig_in == '-' && !BN_is_zero(*outp)) {
+    (*outp)->neg = 1;
+  }
+
+  return 1;
+}
+
+int BN_print(BIO *bp, const BIGNUM *a) {
+  int i, j, v, z = 0;
+  int ret = 0;
+
+  if (a->neg && BIO_write(bp, "-", 1) != 1) {
+    goto end;
+  }
+
+  if (BN_is_zero(a) && BIO_write(bp, "0", 1) != 1) {
+    goto end;
+  }
+
+  for (i = bn_minimal_width(a) - 1; i >= 0; i--) {
+    for (j = BN_BITS2 - 4; j >= 0; j -= 4) {
+      // strip leading zeros
+      v = ((int)(a->d[i] >> (long)j)) & 0x0f;
+      if (z || v != 0) {
+        if (BIO_write(bp, &hextable[v], 1) != 1) {
+          goto end;
+        }
+        z = 1;
+      }
+    }
+  }
+  ret = 1;
+
+end:
+  return ret;
+}
+
+int BN_print_fp(FILE *fp, const BIGNUM *a) {
+  BIO *b = BIO_new_fp(fp, BIO_NOCLOSE);
+  if (b == NULL) {
+    return 0;
+  }
+
+  int ret = BN_print(b, a);
+  BIO_free(b);
+  return ret;
+}
+
+
+size_t BN_bn2mpi(const BIGNUM *in, uint8_t *out) {
+  const size_t bits = BN_num_bits(in);
+  const size_t bytes = (bits + 7) / 8;
+  // If the number of bits is a multiple of 8, i.e. if the MSB is set,
+  // prefix with a zero byte.
+  int extend = 0;
+  if (bytes != 0 && (bits & 0x07) == 0) {
+    extend = 1;
+  }
+
+  const size_t len = bytes + extend;
+  if (len < bytes ||
+      4 + len < len ||
+      (len & 0xffffffff) != len) {
+    // If we cannot represent the number then we emit zero as the interface
+    // doesn't allow an error to be signalled.
+    if (out) {
+      OPENSSL_memset(out, 0, 4);
+    }
+    return 4;
+  }
+
+  if (out == NULL) {
+    return 4 + len;
+  }
+
+  out[0] = len >> 24;
+  out[1] = len >> 16;
+  out[2] = len >> 8;
+  out[3] = len;
+  if (extend) {
+    out[4] = 0;
+  }
+  BN_bn2bin(in, out + 4 + extend);
+  if (in->neg && len > 0) {
+    out[4] |= 0x80;
+  }
+  return len + 4;
+}
+
+BIGNUM *BN_mpi2bn(const uint8_t *in, size_t len, BIGNUM *out) {
+  if (len < 4) {
+    OPENSSL_PUT_ERROR(BN, BN_R_BAD_ENCODING);
+    return NULL;
+  }
+  const size_t in_len = ((size_t)in[0] << 24) |
+                        ((size_t)in[1] << 16) |
+                        ((size_t)in[2] << 8) |
+                        ((size_t)in[3]);
+  if (in_len != len - 4) {
+    OPENSSL_PUT_ERROR(BN, BN_R_BAD_ENCODING);
+    return NULL;
+  }
+
+  int out_is_alloced = 0;
+  if (out == NULL) {
+    out = BN_new();
+    if (out == NULL) {
+      OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
+      return NULL;
+    }
+    out_is_alloced = 1;
+  }
+
+  if (in_len == 0) {
+    BN_zero(out);
+    return out;
+  }
+
+  in += 4;
+  if (BN_bin2bn(in, in_len, out) == NULL) {
+    if (out_is_alloced) {
+      BN_free(out);
+    }
+    return NULL;
+  }
+  out->neg = ((*in) & 0x80) != 0;
+  if (out->neg) {
+    BN_clear_bit(out, BN_num_bits(out) - 1);
+  }
+  return out;
+}
+
+int BN_bn2binpad(const BIGNUM *in, uint8_t *out, int len) {
+  if (len < 0 ||
+      !BN_bn2bin_padded(out, (size_t)len, in)) {
+    return -1;
+  }
+  return len;
+}
--- a/Sources/CBigNumBoringSSL/crypto/bytestring/asn1_compat.c
+++ b/Sources/CBigNumBoringSSL/crypto/bytestring/asn1_compat.c
@ -0,0 +1,52 @@
+/* Copyright (c) 2016, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+
+#include <CBigNumBoringSSL_bytestring.h>
+
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+
+#include <CBigNumBoringSSL_mem.h>
+
+#include "internal.h"
+#include "../internal.h"
+
+
+int CBB_finish_i2d(CBB *cbb, uint8_t **outp) {
+  assert(cbb->base->can_resize);
+
+  uint8_t *der;
+  size_t der_len;
+  if (!CBB_finish(cbb, &der, &der_len)) {
+    CBB_cleanup(cbb);
+    return -1;
+  }
+  if (der_len > INT_MAX) {
+    OPENSSL_free(der);
+    return -1;
+  }
+  if (outp != NULL) {
+    if (*outp == NULL) {
+      *outp = der;
+      der = NULL;
+    } else {
+      OPENSSL_memcpy(*outp, der, der_len);
+      *outp += der_len;
+    }
+  }
+  OPENSSL_free(der);
+  return (int)der_len;
+}
--- a/Sources/CBigNumBoringSSL/crypto/bytestring/ber.c
+++ b/Sources/CBigNumBoringSSL/crypto/bytestring/ber.c
@ -0,0 +1,265 @@
+/* Copyright (c) 2014, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <CBigNumBoringSSL_bytestring.h>
+
+#include <assert.h>
+#include <string.h>
+
+#include "internal.h"
+#include "../internal.h"
+
+
+// kMaxDepth is a just a sanity limit. The code should be such that the length
+// of the input being processes always decreases. None the less, a very large
+// input could otherwise cause the stack to overflow.
+static const unsigned kMaxDepth = 2048;
+
+// is_string_type returns one if |tag| is a string type and zero otherwise. It
+// ignores the constructed bit.
+static int is_string_type(unsigned tag) {
+  switch (tag & ~CBS_ASN1_CONSTRUCTED) {
+    case CBS_ASN1_BITSTRING:
+    case CBS_ASN1_OCTETSTRING:
+    case CBS_ASN1_UTF8STRING:
+    case CBS_ASN1_NUMERICSTRING:
+    case CBS_ASN1_PRINTABLESTRING:
+    case CBS_ASN1_T61STRING:
+    case CBS_ASN1_VIDEOTEXSTRING:
+    case CBS_ASN1_IA5STRING:
+    case CBS_ASN1_GRAPHICSTRING:
+    case CBS_ASN1_VISIBLESTRING:
+    case CBS_ASN1_GENERALSTRING:
+    case CBS_ASN1_UNIVERSALSTRING:
+    case CBS_ASN1_BMPSTRING:
+      return 1;
+    default:
+      return 0;
+  }
+}
+
+// cbs_find_ber walks an ASN.1 structure in |orig_in| and sets |*ber_found|
+// depending on whether an indefinite length element or constructed string was
+// found. The value of |orig_in| is not changed. It returns one on success (i.e.
+// |*ber_found| was set) and zero on error.
+static int cbs_find_ber(const CBS *orig_in, char *ber_found, unsigned depth) {
+  CBS in;
+
+  if (depth > kMaxDepth) {
+    return 0;
+  }
+
+  CBS_init(&in, CBS_data(orig_in), CBS_len(orig_in));
+  *ber_found = 0;
+
+  while (CBS_len(&in) > 0) {
+    CBS contents;
+    unsigned tag;
+    size_t header_len;
+
+    if (!CBS_get_any_ber_asn1_element(&in, &contents, &tag, &header_len)) {
+      return 0;
+    }
+    if (CBS_len(&contents) == header_len &&
+        header_len > 0 &&
+        CBS_data(&contents)[header_len-1] == 0x80) {
+      // Found an indefinite-length element.
+      *ber_found = 1;
+      return 1;
+    }
+    if (tag & CBS_ASN1_CONSTRUCTED) {
+      if (is_string_type(tag)) {
+        // Constructed strings are only legal in BER and require conversion.
+        *ber_found = 1;
+        return 1;
+      }
+      if (!CBS_skip(&contents, header_len) ||
+          !cbs_find_ber(&contents, ber_found, depth + 1)) {
+        return 0;
+      }
+    }
+  }
+
+  return 1;
+}
+
+// is_eoc returns true if |header_len| and |contents|, as returned by
+// |CBS_get_any_ber_asn1_element|, indicate an "end of contents" (EOC) value.
+static char is_eoc(size_t header_len, CBS *contents) {
+  return header_len == 2 && CBS_len(contents) == 2 &&
+         OPENSSL_memcmp(CBS_data(contents), "\x00\x00", 2) == 0;
+}
+
+// cbs_convert_ber reads BER data from |in| and writes DER data to |out|. If
+// |string_tag| is non-zero, then all elements must match |string_tag| up to the
+// constructed bit and primitive element bodies are written to |out| without
+// element headers. This is used when concatenating the fragments of a
+// constructed string. If |looking_for_eoc| is set then any EOC elements found
+// will cause the function to return after consuming it. It returns one on
+// success and zero on error.
+static int cbs_convert_ber(CBS *in, CBB *out, unsigned string_tag,
+                           char looking_for_eoc, unsigned depth) {
+  assert(!(string_tag & CBS_ASN1_CONSTRUCTED));
+
+  if (depth > kMaxDepth) {
+    return 0;
+  }
+
+  while (CBS_len(in) > 0) {
+    CBS contents;
+    unsigned tag, child_string_tag = string_tag;
+    size_t header_len;
+    CBB *out_contents, out_contents_storage;
+
+    if (!CBS_get_any_ber_asn1_element(in, &contents, &tag, &header_len)) {
+      return 0;
+    }
+
+    if (is_eoc(header_len, &contents)) {
+      return looking_for_eoc;
+    }
+
+    if (string_tag != 0) {
+      // This is part of a constructed string. All elements must match
+      // |string_tag| up to the constructed bit and get appended to |out|
+      // without a child element.
+      if ((tag & ~CBS_ASN1_CONSTRUCTED) != string_tag) {
+        return 0;
+      }
+      out_contents = out;
+    } else {
+      unsigned out_tag = tag;
+      if ((tag & CBS_ASN1_CONSTRUCTED) && is_string_type(tag)) {
+        // If a constructed string, clear the constructed bit and inform
+        // children to concatenate bodies.
+        out_tag &= ~CBS_ASN1_CONSTRUCTED;
+        child_string_tag = out_tag;
+      }
+      if (!CBB_add_asn1(out, &out_contents_storage, out_tag)) {
+        return 0;
+      }
+      out_contents = &out_contents_storage;
+    }
+
+    if (CBS_len(&contents) == header_len && header_len > 0 &&
+        CBS_data(&contents)[header_len - 1] == 0x80) {
+      // This is an indefinite length element.
+      if (!cbs_convert_ber(in, out_contents, child_string_tag,
+                           1 /* looking for eoc */, depth + 1) ||
+          !CBB_flush(out)) {
+        return 0;
+      }
+      continue;
+    }
+
+    if (!CBS_skip(&contents, header_len)) {
+      return 0;
+    }
+
+    if (tag & CBS_ASN1_CONSTRUCTED) {
+      // Recurse into children.
+      if (!cbs_convert_ber(&contents, out_contents, child_string_tag,
+                           0 /* not looking for eoc */, depth + 1)) {
+        return 0;
+      }
+    } else {
+      // Copy primitive contents as-is.
+      if (!CBB_add_bytes(out_contents, CBS_data(&contents),
+                         CBS_len(&contents))) {
+        return 0;
+      }
+    }
+
+    if (!CBB_flush(out)) {
+      return 0;
+    }
+  }
+
+  return looking_for_eoc == 0;
+}
+
+int CBS_asn1_ber_to_der(CBS *in, CBS *out, uint8_t **out_storage) {
+  CBB cbb;
+
+  // First, do a quick walk to find any indefinite-length elements. Most of the
+  // time we hope that there aren't any and thus we can quickly return.
+  char conversion_needed;
+  if (!cbs_find_ber(in, &conversion_needed, 0)) {
+    return 0;
+  }
+
+  if (!conversion_needed) {
+    if (!CBS_get_any_asn1_element(in, out, NULL, NULL)) {
+      return 0;
+    }
+    *out_storage = NULL;
+    return 1;
+  }
+
+  size_t len;
+  if (!CBB_init(&cbb, CBS_len(in)) ||
+      !cbs_convert_ber(in, &cbb, 0, 0, 0) ||
+      !CBB_finish(&cbb, out_storage, &len)) {
+    CBB_cleanup(&cbb);
+    return 0;
+  }
+
+  CBS_init(out, *out_storage, len);
+  return 1;
+}
+
+int CBS_get_asn1_implicit_string(CBS *in, CBS *out, uint8_t **out_storage,
+                                 unsigned outer_tag, unsigned inner_tag) {
+  assert(!(outer_tag & CBS_ASN1_CONSTRUCTED));
+  assert(!(inner_tag & CBS_ASN1_CONSTRUCTED));
+  assert(is_string_type(inner_tag));
+
+  if (CBS_peek_asn1_tag(in, outer_tag)) {
+    // Normal implicitly-tagged string.
+    *out_storage = NULL;
+    return CBS_get_asn1(in, out, outer_tag);
+  }
+
+  // Otherwise, try to parse an implicitly-tagged constructed string.
+  // |CBS_asn1_ber_to_der| is assumed to have run, so only allow one level deep
+  // of nesting.
+  CBB result;
+  CBS child;
+  if (!CBB_init(&result, CBS_len(in)) ||
+      !CBS_get_asn1(in, &child, outer_tag | CBS_ASN1_CONSTRUCTED)) {
+    goto err;
+  }
+
+  while (CBS_len(&child) > 0) {
+    CBS chunk;
+    if (!CBS_get_asn1(&child, &chunk, inner_tag) ||
+        !CBB_add_bytes(&result, CBS_data(&chunk), CBS_len(&chunk))) {
+      goto err;
+    }
+  }
+
+  uint8_t *data;
+  size_t len;
+  if (!CBB_finish(&result, &data, &len)) {
+    goto err;
+  }
+
+  CBS_init(out, data, len);
+  *out_storage = data;
+  return 1;
+
+err:
+  CBB_cleanup(&result);
+  return 0;
+}
--- a/Sources/CBigNumBoringSSL/crypto/bytestring/cbb.c
+++ b/Sources/CBigNumBoringSSL/crypto/bytestring/cbb.c
@ -0,0 +1,719 @@
+/* Copyright (c) 2014, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <CBigNumBoringSSL_bytestring.h>
+
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+
+#include <CBigNumBoringSSL_mem.h>
+
+#include "../internal.h"
+
+
+void CBB_zero(CBB *cbb) {
+  OPENSSL_memset(cbb, 0, sizeof(CBB));
+}
+
+static int cbb_init(CBB *cbb, uint8_t *buf, size_t cap) {
+  // This assumes that |cbb| has already been zeroed.
+  struct cbb_buffer_st *base;
+
+  base = OPENSSL_malloc(sizeof(struct cbb_buffer_st));
+  if (base == NULL) {
+    return 0;
+  }
+
+  base->buf = buf;
+  base->len = 0;
+  base->cap = cap;
+  base->can_resize = 1;
+  base->error = 0;
+
+  cbb->base = base;
+  cbb->is_child = 0;
+  return 1;
+}
+
+int CBB_init(CBB *cbb, size_t initial_capacity) {
+  CBB_zero(cbb);
+
+  uint8_t *buf = OPENSSL_malloc(initial_capacity);
+  if (initial_capacity > 0 && buf == NULL) {
+    return 0;
+  }
+
+  if (!cbb_init(cbb, buf, initial_capacity)) {
+    OPENSSL_free(buf);
+    return 0;
+  }
+
+  return 1;
+}
+
+int CBB_init_fixed(CBB *cbb, uint8_t *buf, size_t len) {
+  CBB_zero(cbb);
+
+  if (!cbb_init(cbb, buf, len)) {
+    return 0;
+  }
+
+  cbb->base->can_resize = 0;
+  return 1;
+}
+
+void CBB_cleanup(CBB *cbb) {
+  // Child |CBB|s are non-owning. They are implicitly discarded and should not
+  // be used with |CBB_cleanup| or |ScopedCBB|.
+  assert(!cbb->is_child);
+  if (cbb->is_child) {
+    return;
+  }
+
+  if (cbb->base) {
+    if (cbb->base->can_resize) {
+      OPENSSL_free(cbb->base->buf);
+    }
+    OPENSSL_free(cbb->base);
+  }
+  cbb->base = NULL;
+}
+
+static int cbb_buffer_reserve(struct cbb_buffer_st *base, uint8_t **out,
+                              size_t len) {
+  size_t newlen;
+
+  if (base == NULL) {
+    return 0;
+  }
+
+  newlen = base->len + len;
+  if (newlen < base->len) {
+    // Overflow
+    goto err;
+  }
+
+  if (newlen > base->cap) {
+    size_t newcap = base->cap * 2;
+    uint8_t *newbuf;
+
+    if (!base->can_resize) {
+      goto err;
+    }
+
+    if (newcap < base->cap || newcap < newlen) {
+      newcap = newlen;
+    }
+    newbuf = OPENSSL_realloc(base->buf, newcap);
+    if (newbuf == NULL) {
+      goto err;
+    }
+
+    base->buf = newbuf;
+    base->cap = newcap;
+  }
+
+  if (out) {
+    *out = base->buf + base->len;
+  }
+
+  return 1;
+
+err:
+  base->error = 1;
+  return 0;
+}
+
+static int cbb_buffer_add(struct cbb_buffer_st *base, uint8_t **out,
+                          size_t len) {
+  if (!cbb_buffer_reserve(base, out, len)) {
+    return 0;
+  }
+  // This will not overflow or |cbb_buffer_reserve| would have failed.
+  base->len += len;
+  return 1;
+}
+
+static int cbb_buffer_add_u(struct cbb_buffer_st *base, uint64_t v,
+                            size_t len_len) {
+  if (len_len == 0) {
+    return 1;
+  }
+
+  uint8_t *buf;
+  if (!cbb_buffer_add(base, &buf, len_len)) {
+    return 0;
+  }
+
+  for (size_t i = len_len - 1; i < len_len; i--) {
+    buf[i] = v;
+    v >>= 8;
+  }
+
+  if (v != 0) {
+    base->error = 1;
+    return 0;
+  }
+
+  return 1;
+}
+
+int CBB_finish(CBB *cbb, uint8_t **out_data, size_t *out_len) {
+  if (cbb->is_child) {
+    return 0;
+  }
+
+  if (!CBB_flush(cbb)) {
+    return 0;
+  }
+
+  if (cbb->base->can_resize && (out_data == NULL || out_len == NULL)) {
+    // |out_data| and |out_len| can only be NULL if the CBB is fixed.
+    return 0;
+  }
+
+  if (out_data != NULL) {
+    *out_data = cbb->base->buf;
+  }
+  if (out_len != NULL) {
+    *out_len = cbb->base->len;
+  }
+  cbb->base->buf = NULL;
+  CBB_cleanup(cbb);
+  return 1;
+}
+
+// CBB_flush recurses and then writes out any pending length prefix. The
+// current length of the underlying base is taken to be the length of the
+// length-prefixed data.
+int CBB_flush(CBB *cbb) {
+  size_t child_start, i, len;
+
+  // If |cbb->base| has hit an error, the buffer is in an undefined state, so
+  // fail all following calls. In particular, |cbb->child| may point to invalid
+  // memory.
+  if (cbb->base == NULL || cbb->base->error) {
+    return 0;
+  }
+
+  if (cbb->child == NULL || cbb->child->pending_len_len == 0) {
+    return 1;
+  }
+
+  child_start = cbb->child->offset + cbb->child->pending_len_len;
+
+  if (!CBB_flush(cbb->child) ||
+      child_start < cbb->child->offset ||
+      cbb->base->len < child_start) {
+    goto err;
+  }
+
+  len = cbb->base->len - child_start;
+
+  if (cbb->child->pending_is_asn1) {
+    // For ASN.1 we assume that we'll only need a single byte for the length.
+    // If that turned out to be incorrect, we have to move the contents along
+    // in order to make space.
+    uint8_t len_len;
+    uint8_t initial_length_byte;
+
+    assert (cbb->child->pending_len_len == 1);
+
+    if (len > 0xfffffffe) {
+      // Too large.
+      goto err;
+    } else if (len > 0xffffff) {
+      len_len = 5;
+      initial_length_byte = 0x80 | 4;
+    } else if (len > 0xffff) {
+      len_len = 4;
+      initial_length_byte = 0x80 | 3;
+    } else if (len > 0xff) {
+      len_len = 3;
+      initial_length_byte = 0x80 | 2;
+    } else if (len > 0x7f) {
+      len_len = 2;
+      initial_length_byte = 0x80 | 1;
+    } else {
+      len_len = 1;
+      initial_length_byte = (uint8_t)len;
+      len = 0;
+    }
+
+    if (len_len != 1) {
+      // We need to move the contents along in order to make space.
+      size_t extra_bytes = len_len - 1;
+      if (!cbb_buffer_add(cbb->base, NULL, extra_bytes)) {
+        goto err;
+      }
+      OPENSSL_memmove(cbb->base->buf + child_start + extra_bytes,
+                      cbb->base->buf + child_start, len);
+    }
+    cbb->base->buf[cbb->child->offset++] = initial_length_byte;
+    cbb->child->pending_len_len = len_len - 1;
+  }
+
+  for (i = cbb->child->pending_len_len - 1; i < cbb->child->pending_len_len;
+       i--) {
+    cbb->base->buf[cbb->child->offset + i] = (uint8_t)len;
+    len >>= 8;
+  }
+  if (len != 0) {
+    goto err;
+  }
+
+  cbb->child->base = NULL;
+  cbb->child = NULL;
+
+  return 1;
+
+err:
+  cbb->base->error = 1;
+  return 0;
+}
+
+const uint8_t *CBB_data(const CBB *cbb) {
+  assert(cbb->child == NULL);
+  return cbb->base->buf + cbb->offset + cbb->pending_len_len;
+}
+
+size_t CBB_len(const CBB *cbb) {
+  assert(cbb->child == NULL);
+  assert(cbb->offset + cbb->pending_len_len <= cbb->base->len);
+
+  return cbb->base->len - cbb->offset - cbb->pending_len_len;
+}
+
+static int cbb_add_length_prefixed(CBB *cbb, CBB *out_contents,
+                                   uint8_t len_len) {
+  uint8_t *prefix_bytes;
+
+  if (!CBB_flush(cbb)) {
+    return 0;
+  }
+
+  size_t offset = cbb->base->len;
+  if (!cbb_buffer_add(cbb->base, &prefix_bytes, len_len)) {
+    return 0;
+  }
+
+  OPENSSL_memset(prefix_bytes, 0, len_len);
+  OPENSSL_memset(out_contents, 0, sizeof(CBB));
+  out_contents->base = cbb->base;
+  out_contents->is_child = 1;
+  cbb->child = out_contents;
+  cbb->child->offset = offset;
+  cbb->child->pending_len_len = len_len;
+  cbb->child->pending_is_asn1 = 0;
+
+  return 1;
+}
+
+int CBB_add_u8_length_prefixed(CBB *cbb, CBB *out_contents) {
+  return cbb_add_length_prefixed(cbb, out_contents, 1);
+}
+
+int CBB_add_u16_length_prefixed(CBB *cbb, CBB *out_contents) {
+  return cbb_add_length_prefixed(cbb, out_contents, 2);
+}
+
+int CBB_add_u24_length_prefixed(CBB *cbb, CBB *out_contents) {
+  return cbb_add_length_prefixed(cbb, out_contents, 3);
+}
+
+// add_base128_integer encodes |v| as a big-endian base-128 integer where the
+// high bit of each byte indicates where there is more data. This is the
+// encoding used in DER for both high tag number form and OID components.
+static int add_base128_integer(CBB *cbb, uint64_t v) {
+  unsigned len_len = 0;
+  uint64_t copy = v;
+  while (copy > 0) {
+    len_len++;
+    copy >>= 7;
+  }
+  if (len_len == 0) {
+    len_len = 1;  // Zero is encoded with one byte.
+  }
+  for (unsigned i = len_len - 1; i < len_len; i--) {
+    uint8_t byte = (v >> (7 * i)) & 0x7f;
+    if (i != 0) {
+      // The high bit denotes whether there is more data.
+      byte |= 0x80;
+    }
+    if (!CBB_add_u8(cbb, byte)) {
+      return 0;
+    }
+  }
+  return 1;
+}
+
+int CBB_add_asn1(CBB *cbb, CBB *out_contents, unsigned tag) {
+  if (!CBB_flush(cbb)) {
+    return 0;
+  }
+
+  // Split the tag into leading bits and tag number.
+  uint8_t tag_bits = (tag >> CBS_ASN1_TAG_SHIFT) & 0xe0;
+  unsigned tag_number = tag & CBS_ASN1_TAG_NUMBER_MASK;
+  if (tag_number >= 0x1f) {
+    // Set all the bits in the tag number to signal high tag number form.
+    if (!CBB_add_u8(cbb, tag_bits | 0x1f) ||
+        !add_base128_integer(cbb, tag_number)) {
+      return 0;
+    }
+  } else if (!CBB_add_u8(cbb, tag_bits | tag_number)) {
+    return 0;
+  }
+
+  size_t offset = cbb->base->len;
+  if (!CBB_add_u8(cbb, 0)) {
+    return 0;
+  }
+
+  OPENSSL_memset(out_contents, 0, sizeof(CBB));
+  out_contents->base = cbb->base;
+  out_contents->is_child = 1;
+  cbb->child = out_contents;
+  cbb->child->offset = offset;
+  cbb->child->pending_len_len = 1;
+  cbb->child->pending_is_asn1 = 1;
+
+  return 1;
+}
+
+int CBB_add_bytes(CBB *cbb, const uint8_t *data, size_t len) {
+  uint8_t *dest;
+
+  if (!CBB_flush(cbb) ||
+      !cbb_buffer_add(cbb->base, &dest, len)) {
+    return 0;
+  }
+  OPENSSL_memcpy(dest, data, len);
+  return 1;
+}
+
+int CBB_add_space(CBB *cbb, uint8_t **out_data, size_t len) {
+  if (!CBB_flush(cbb) ||
+      !cbb_buffer_add(cbb->base, out_data, len)) {
+    return 0;
+  }
+  return 1;
+}
+
+int CBB_reserve(CBB *cbb, uint8_t **out_data, size_t len) {
+  if (!CBB_flush(cbb) ||
+      !cbb_buffer_reserve(cbb->base, out_data, len)) {
+    return 0;
+  }
+  return 1;
+}
+
+int CBB_did_write(CBB *cbb, size_t len) {
+  size_t newlen = cbb->base->len + len;
+  if (cbb->child != NULL ||
+      newlen < cbb->base->len ||
+      newlen > cbb->base->cap) {
+    return 0;
+  }
+  cbb->base->len = newlen;
+  return 1;
+}
+
+int CBB_add_u8(CBB *cbb, uint8_t value) {
+  if (!CBB_flush(cbb)) {
+    return 0;
+  }
+
+  return cbb_buffer_add_u(cbb->base, value, 1);
+}
+
+int CBB_add_u16(CBB *cbb, uint16_t value) {
+  if (!CBB_flush(cbb)) {
+    return 0;
+  }
+
+  return cbb_buffer_add_u(cbb->base, value, 2);
+}
+
+int CBB_add_u16le(CBB *cbb, uint16_t value) {
+  return CBB_add_u16(cbb, CRYPTO_bswap2(value));
+}
+
+int CBB_add_u24(CBB *cbb, uint32_t value) {
+  if (!CBB_flush(cbb)) {
+    return 0;
+  }
+
+  return cbb_buffer_add_u(cbb->base, value, 3);
+}
+
+int CBB_add_u32(CBB *cbb, uint32_t value) {
+  if (!CBB_flush(cbb)) {
+    return 0;
+  }
+
+  return cbb_buffer_add_u(cbb->base, value, 4);
+}
+
+int CBB_add_u32le(CBB *cbb, uint32_t value) {
+  return CBB_add_u32(cbb, CRYPTO_bswap4(value));
+}
+
+int CBB_add_u64(CBB *cbb, uint64_t value) {
+  if (!CBB_flush(cbb)) {
+    return 0;
+  }
+  return cbb_buffer_add_u(cbb->base, value, 8);
+}
+
+int CBB_add_u64le(CBB *cbb, uint64_t value) {
+  return CBB_add_u64(cbb, CRYPTO_bswap8(value));
+}
+
+void CBB_discard_child(CBB *cbb) {
+  if (cbb->child == NULL) {
+    return;
+  }
+
+  cbb->base->len = cbb->child->offset;
+
+  cbb->child->base = NULL;
+  cbb->child = NULL;
+}
+
+int CBB_add_asn1_uint64(CBB *cbb, uint64_t value) {
+  CBB child;
+  int started = 0;
+
+  if (!CBB_add_asn1(cbb, &child, CBS_ASN1_INTEGER)) {
+    return 0;
+  }
+
+  for (size_t i = 0; i < 8; i++) {
+    uint8_t byte = (value >> 8*(7-i)) & 0xff;
+    if (!started) {
+      if (byte == 0) {
+        // Don't encode leading zeros.
+        continue;
+      }
+      // If the high bit is set, add a padding byte to make it
+      // unsigned.
+      if ((byte & 0x80) && !CBB_add_u8(&child, 0)) {
+        return 0;
+      }
+      started = 1;
+    }
+    if (!CBB_add_u8(&child, byte)) {
+      return 0;
+    }
+  }
+
+  // 0 is encoded as a single 0, not the empty string.
+  if (!started && !CBB_add_u8(&child, 0)) {
+    return 0;
+  }
+
+  return CBB_flush(cbb);
+}
+
+int CBB_add_asn1_int64(CBB *cbb, int64_t value) {
+  if (value >= 0) {
+    return CBB_add_asn1_uint64(cbb, value);
+  }
+
+  union {
+    int64_t i;
+    uint8_t bytes[sizeof(int64_t)];
+  } u;
+  u.i = value;
+  int start = 7;
+  // Skip leading sign-extension bytes unless they are necessary.
+  while (start > 0 && (u.bytes[start] == 0xff && (u.bytes[start - 1] & 0x80))) {
+    start--;
+  }
+
+  CBB child;
+  if (!CBB_add_asn1(cbb, &child, CBS_ASN1_INTEGER)) {
+    return 0;
+  }
+  for (int i = start; i >= 0; i--) {
+    if (!CBB_add_u8(&child, u.bytes[i])) {
+      return 0;
+    }
+  }
+  return CBB_flush(cbb);
+}
+
+int CBB_add_asn1_octet_string(CBB *cbb, const uint8_t *data, size_t data_len) {
+  CBB child;
+  if (!CBB_add_asn1(cbb, &child, CBS_ASN1_OCTETSTRING) ||
+      !CBB_add_bytes(&child, data, data_len) ||
+      !CBB_flush(cbb)) {
+    return 0;
+  }
+
+  return 1;
+}
+
+int CBB_add_asn1_bool(CBB *cbb, int value) {
+  CBB child;
+  if (!CBB_add_asn1(cbb, &child, CBS_ASN1_BOOLEAN) ||
+      !CBB_add_u8(&child, value != 0 ? 0xff : 0) ||
+      !CBB_flush(cbb)) {
+    return 0;
+  }
+
+  return 1;
+}
+
+// parse_dotted_decimal parses one decimal component from |cbs|, where |cbs| is
+// an OID literal, e.g., "1.2.840.113554.4.1.72585". It consumes both the
+// component and the dot, so |cbs| may be passed into the function again for the
+// next value.
+static int parse_dotted_decimal(CBS *cbs, uint64_t *out) {
+  *out = 0;
+  int seen_digit = 0;
+  for (;;) {
+    // Valid terminators for a component are the end of the string or a
+    // non-terminal dot. If the string ends with a dot, this is not a valid OID
+    // string.
+    uint8_t u;
+    if (!CBS_get_u8(cbs, &u) ||
+        (u == '.' && CBS_len(cbs) > 0)) {
+      break;
+    }
+    if (u < '0' || u > '9' ||
+        // Forbid stray leading zeros.
+        (seen_digit && *out == 0) ||
+        // Check for overflow.
+        *out > UINT64_MAX / 10 ||
+        *out * 10 > UINT64_MAX - (u - '0')) {
+      return 0;
+    }
+    *out = *out * 10 + (u - '0');
+    seen_digit = 1;
+  }
+  // The empty string is not a legal OID component.
+  return seen_digit;
+}
+
+int CBB_add_asn1_oid_from_text(CBB *cbb, const char *text, size_t len) {
+  if (!CBB_flush(cbb)) {
+    return 0;
+  }
+
+  CBS cbs;
+  CBS_init(&cbs, (const uint8_t *)text, len);
+
+  // OIDs must have at least two components.
+  uint64_t a, b;
+  if (!parse_dotted_decimal(&cbs, &a) ||
+      !parse_dotted_decimal(&cbs, &b)) {
+    return 0;
+  }
+
+  // The first component is encoded as 40 * |a| + |b|. This assumes that |a| is
+  // 0, 1, or 2 and that, when it is 0 or 1, |b| is at most 39.
+  if (a > 2 ||
+      (a < 2 && b > 39) ||
+      b > UINT64_MAX - 80 ||
+      !add_base128_integer(cbb, 40u * a + b)) {
+    return 0;
+  }
+
+  // The remaining components are encoded unmodified.
+  while (CBS_len(&cbs) > 0) {
+    if (!parse_dotted_decimal(&cbs, &a) ||
+        !add_base128_integer(cbb, a)) {
+      return 0;
+    }
+  }
+
+  return 1;
+}
+
+static int compare_set_of_element(const void *a_ptr, const void *b_ptr) {
+  // See X.690, section 11.6 for the ordering. They are sorted in ascending
+  // order by their DER encoding.
+  const CBS *a = a_ptr, *b = b_ptr;
+  size_t a_len = CBS_len(a), b_len = CBS_len(b);
+  size_t min_len = a_len < b_len ? a_len : b_len;
+  int ret = OPENSSL_memcmp(CBS_data(a), CBS_data(b), min_len);
+  if (ret != 0) {
+    return ret;
+  }
+  if (a_len == b_len) {
+    return 0;
+  }
+  // If one is a prefix of the other, the shorter one sorts first. (This is not
+  // actually reachable. No DER encoding is a prefix of another DER encoding.)
+  return a_len < b_len ? -1 : 1;
+}
+
+int CBB_flush_asn1_set_of(CBB *cbb) {
+  if (!CBB_flush(cbb)) {
+    return 0;
+  }
+
+  CBS cbs;
+  size_t num_children = 0;
+  CBS_init(&cbs, CBB_data(cbb), CBB_len(cbb));
+  while (CBS_len(&cbs) != 0) {
+    if (!CBS_get_any_asn1_element(&cbs, NULL, NULL, NULL)) {
+      return 0;
+    }
+    num_children++;
+  }
+
+  if (num_children < 2) {
+    return 1;  // Nothing to do. This is the common case for X.509.
+  }
+  if (num_children > ((size_t)-1) / sizeof(CBS)) {
+    return 0;  // Overflow.
+  }
+
+  // Parse out the children and sort. We alias them into a copy of so they
+  // remain valid as we rewrite |cbb|.
+  int ret = 0;
+  size_t buf_len = CBB_len(cbb);
+  uint8_t *buf = OPENSSL_memdup(CBB_data(cbb), buf_len);
+  CBS *children = OPENSSL_malloc(num_children * sizeof(CBS));
+  if (buf == NULL || children == NULL) {
+    goto err;
+  }
+  CBS_init(&cbs, buf, buf_len);
+  for (size_t i = 0; i < num_children; i++) {
+    if (!CBS_get_any_asn1_element(&cbs, &children[i], NULL, NULL)) {
+      goto err;
+    }
+  }
+  qsort(children, num_children, sizeof(CBS), compare_set_of_element);
+
+  // Rewind |cbb| and write the contents back in the new order.
+  cbb->base->len = cbb->offset + cbb->pending_len_len;
+  for (size_t i = 0; i < num_children; i++) {
+    if (!CBB_add_bytes(cbb, CBS_data(&children[i]), CBS_len(&children[i]))) {
+      goto err;
+    }
+  }
+  assert(CBB_len(cbb) == buf_len);
+
+  ret = 1;
+
+err:
+  OPENSSL_free(buf);
+  OPENSSL_free(children);
+  return ret;
+}
--- a/Sources/CBigNumBoringSSL/crypto/bytestring/cbs.c
+++ b/Sources/CBigNumBoringSSL/crypto/bytestring/cbs.c
@ -0,0 +1,688 @@
+/* Copyright (c) 2014, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <CBigNumBoringSSL_mem.h>
+#include <CBigNumBoringSSL_bytestring.h>
+
+#include <assert.h>
+#include <inttypes.h>
+#include <string.h>
+
+#include "internal.h"
+#include "../internal.h"
+
+
+void CBS_init(CBS *cbs, const uint8_t *data, size_t len) {
+  cbs->data = data;
+  cbs->len = len;
+}
+
+static int cbs_get(CBS *cbs, const uint8_t **p, size_t n) {
+  if (cbs->len < n) {
+    return 0;
+  }
+
+  *p = cbs->data;
+  cbs->data += n;
+  cbs->len -= n;
+  return 1;
+}
+
+int CBS_skip(CBS *cbs, size_t len) {
+  const uint8_t *dummy;
+  return cbs_get(cbs, &dummy, len);
+}
+
+const uint8_t *CBS_data(const CBS *cbs) {
+  return cbs->data;
+}
+
+size_t CBS_len(const CBS *cbs) {
+  return cbs->len;
+}
+
+int CBS_stow(const CBS *cbs, uint8_t **out_ptr, size_t *out_len) {
+  OPENSSL_free(*out_ptr);
+  *out_ptr = NULL;
+  *out_len = 0;
+
+  if (cbs->len == 0) {
+    return 1;
+  }
+  *out_ptr = OPENSSL_memdup(cbs->data, cbs->len);
+  if (*out_ptr == NULL) {
+    return 0;
+  }
+  *out_len = cbs->len;
+  return 1;
+}
+
+int CBS_strdup(const CBS *cbs, char **out_ptr) {
+  if (*out_ptr != NULL) {
+    OPENSSL_free(*out_ptr);
+  }
+  *out_ptr = OPENSSL_strndup((const char*)cbs->data, cbs->len);
+  return (*out_ptr != NULL);
+}
+
+int CBS_contains_zero_byte(const CBS *cbs) {
+  return OPENSSL_memchr(cbs->data, 0, cbs->len) != NULL;
+}
+
+int CBS_mem_equal(const CBS *cbs, const uint8_t *data, size_t len) {
+  if (len != cbs->len) {
+    return 0;
+  }
+  return CRYPTO_memcmp(cbs->data, data, len) == 0;
+}
+
+static int cbs_get_u(CBS *cbs, uint64_t *out, size_t len) {
+  uint64_t result = 0;
+  const uint8_t *data;
+
+  if (!cbs_get(cbs, &data, len)) {
+    return 0;
+  }
+  for (size_t i = 0; i < len; i++) {
+    result <<= 8;
+    result |= data[i];
+  }
+  *out = result;
+  return 1;
+}
+
+int CBS_get_u8(CBS *cbs, uint8_t *out) {
+  const uint8_t *v;
+  if (!cbs_get(cbs, &v, 1)) {
+    return 0;
+  }
+  *out = *v;
+  return 1;
+}
+
+int CBS_get_u16(CBS *cbs, uint16_t *out) {
+  uint64_t v;
+  if (!cbs_get_u(cbs, &v, 2)) {
+    return 0;
+  }
+  *out = v;
+  return 1;
+}
+
+int CBS_get_u16le(CBS *cbs, uint16_t *out) {
+  if (!CBS_get_u16(cbs, out)) {
+    return 0;
+  }
+  *out = CRYPTO_bswap2(*out);
+  return 1;
+}
+
+int CBS_get_u24(CBS *cbs, uint32_t *out) {
+  uint64_t v;
+  if (!cbs_get_u(cbs, &v, 3)) {
+    return 0;
+  }
+  *out = v;
+  return 1;
+}
+
+int CBS_get_u32(CBS *cbs, uint32_t *out) {
+  uint64_t v;
+  if (!cbs_get_u(cbs, &v, 4)) {
+    return 0;
+  }
+  *out = v;
+  return 1;
+}
+
+int CBS_get_u32le(CBS *cbs, uint32_t *out) {
+  if (!CBS_get_u32(cbs, out)) {
+    return 0;
+  }
+  *out = CRYPTO_bswap4(*out);
+  return 1;
+}
+
+int CBS_get_u64(CBS *cbs, uint64_t *out) {
+  return cbs_get_u(cbs, out, 8);
+}
+
+int CBS_get_u64le(CBS *cbs, uint64_t *out) {
+  if (!cbs_get_u(cbs, out, 8)) {
+    return 0;
+  }
+  *out = CRYPTO_bswap8(*out);
+  return 1;
+}
+
+int CBS_get_last_u8(CBS *cbs, uint8_t *out) {
+  if (cbs->len == 0) {
+    return 0;
+  }
+  *out = cbs->data[cbs->len - 1];
+  cbs->len--;
+  return 1;
+}
+
+int CBS_get_bytes(CBS *cbs, CBS *out, size_t len) {
+  const uint8_t *v;
+  if (!cbs_get(cbs, &v, len)) {
+    return 0;
+  }
+  CBS_init(out, v, len);
+  return 1;
+}
+
+int CBS_copy_bytes(CBS *cbs, uint8_t *out, size_t len) {
+  const uint8_t *v;
+  if (!cbs_get(cbs, &v, len)) {
+    return 0;
+  }
+  OPENSSL_memcpy(out, v, len);
+  return 1;
+}
+
+static int cbs_get_length_prefixed(CBS *cbs, CBS *out, size_t len_len) {
+  uint64_t len;
+  if (!cbs_get_u(cbs, &len, len_len)) {
+    return 0;
+  }
+  // If |len_len| <= 3 then we know that |len| will fit into a |size_t|, even on
+  // 32-bit systems.
+  assert(len_len <= 3);
+  return CBS_get_bytes(cbs, out, len);
+}
+
+int CBS_get_u8_length_prefixed(CBS *cbs, CBS *out) {
+  return cbs_get_length_prefixed(cbs, out, 1);
+}
+
+int CBS_get_u16_length_prefixed(CBS *cbs, CBS *out) {
+  return cbs_get_length_prefixed(cbs, out, 2);
+}
+
+int CBS_get_u24_length_prefixed(CBS *cbs, CBS *out) {
+  return cbs_get_length_prefixed(cbs, out, 3);
+}
+
+// parse_base128_integer reads a big-endian base-128 integer from |cbs| and sets
+// |*out| to the result. This is the encoding used in DER for both high tag
+// number form and OID components.
+static int parse_base128_integer(CBS *cbs, uint64_t *out) {
+  uint64_t v = 0;
+  uint8_t b;
+  do {
+    if (!CBS_get_u8(cbs, &b)) {
+      return 0;
+    }
+    if ((v >> (64 - 7)) != 0) {
+      // The value is too large.
+      return 0;
+    }
+    if (v == 0 && b == 0x80) {
+      // The value must be minimally encoded.
+      return 0;
+    }
+    v = (v << 7) | (b & 0x7f);
+
+    // Values end at an octet with the high bit cleared.
+  } while (b & 0x80);
+
+  *out = v;
+  return 1;
+}
+
+static int parse_asn1_tag(CBS *cbs, unsigned *out) {
+  uint8_t tag_byte;
+  if (!CBS_get_u8(cbs, &tag_byte)) {
+    return 0;
+  }
+
+  // ITU-T X.690 section 8.1.2.3 specifies the format for identifiers with a tag
+  // number no greater than 30.
+  //
+  // If the number portion is 31 (0x1f, the largest value that fits in the
+  // allotted bits), then the tag is more than one byte long and the
+  // continuation bytes contain the tag number. This parser only supports tag
+  // numbers less than 31 (and thus single-byte tags).
+  unsigned tag = ((unsigned)tag_byte & 0xe0) << CBS_ASN1_TAG_SHIFT;
+  unsigned tag_number = tag_byte & 0x1f;
+  if (tag_number == 0x1f) {
+    uint64_t v;
+    if (!parse_base128_integer(cbs, &v) ||
+        // Check the tag number is within our supported bounds.
+        v > CBS_ASN1_TAG_NUMBER_MASK ||
+        // Small tag numbers should have used low tag number form.
+        v < 0x1f) {
+      return 0;
+    }
+    tag_number = (unsigned)v;
+  }
+
+  tag |= tag_number;
+
+  *out = tag;
+  return 1;
+}
+
+static int cbs_get_any_asn1_element(CBS *cbs, CBS *out, unsigned *out_tag,
+                                    size_t *out_header_len, int ber_ok) {
+  CBS header = *cbs;
+  CBS throwaway;
+
+  if (out == NULL) {
+    out = &throwaway;
+  }
+
+  unsigned tag;
+  if (!parse_asn1_tag(&header, &tag)) {
+    return 0;
+  }
+  if (out_tag != NULL) {
+    *out_tag = tag;
+  }
+
+  uint8_t length_byte;
+  if (!CBS_get_u8(&header, &length_byte)) {
+    return 0;
+  }
+
+  size_t header_len = CBS_len(cbs) - CBS_len(&header);
+
+  size_t len;
+  // The format for the length encoding is specified in ITU-T X.690 section
+  // 8.1.3.
+  if ((length_byte & 0x80) == 0) {
+    // Short form length.
+    len = ((size_t) length_byte) + header_len;
+    if (out_header_len != NULL) {
+      *out_header_len = header_len;
+    }
+  } else {
+    // The high bit indicate that this is the long form, while the next 7 bits
+    // encode the number of subsequent octets used to encode the length (ITU-T
+    // X.690 clause 8.1.3.5.b).
+    const size_t num_bytes = length_byte & 0x7f;
+    uint64_t len64;
+
+    if (ber_ok && (tag & CBS_ASN1_CONSTRUCTED) != 0 && num_bytes == 0) {
+      // indefinite length
+      if (out_header_len != NULL) {
+        *out_header_len = header_len;
+      }
+      return CBS_get_bytes(cbs, out, header_len);
+    }
+
+    // ITU-T X.690 clause 8.1.3.5.c specifies that the value 0xff shall not be
+    // used as the first byte of the length. If this parser encounters that
+    // value, num_bytes will be parsed as 127, which will fail the check below.
+    if (num_bytes == 0 || num_bytes > 4) {
+      return 0;
+    }
+    if (!cbs_get_u(&header, &len64, num_bytes)) {
+      return 0;
+    }
+    // ITU-T X.690 section 10.1 (DER length forms) requires encoding the length
+    // with the minimum number of octets.
+    if (len64 < 128) {
+      // Length should have used short-form encoding.
+      return 0;
+    }
+    if ((len64 >> ((num_bytes-1)*8)) == 0) {
+      // Length should have been at least one byte shorter.
+      return 0;
+    }
+    len = len64;
+    if (len + header_len + num_bytes < len) {
+      // Overflow.
+      return 0;
+    }
+    len += header_len + num_bytes;
+    if (out_header_len != NULL) {
+      *out_header_len = header_len + num_bytes;
+    }
+  }
+
+  return CBS_get_bytes(cbs, out, len);
+}
+
+int CBS_get_any_asn1(CBS *cbs, CBS *out, unsigned *out_tag) {
+  size_t header_len;
+  if (!CBS_get_any_asn1_element(cbs, out, out_tag, &header_len)) {
+    return 0;
+  }
+
+  if (!CBS_skip(out, header_len)) {
+    assert(0);
+    return 0;
+  }
+
+  return 1;
+}
+
+int CBS_get_any_asn1_element(CBS *cbs, CBS *out, unsigned *out_tag,
+                                    size_t *out_header_len) {
+  return cbs_get_any_asn1_element(cbs, out, out_tag, out_header_len,
+                                  0 /* DER only */);
+}
+
+int CBS_get_any_ber_asn1_element(CBS *cbs, CBS *out, unsigned *out_tag,
+                                 size_t *out_header_len) {
+  return cbs_get_any_asn1_element(cbs, out, out_tag, out_header_len,
+                                  1 /* BER allowed */);
+}
+
+static int cbs_get_asn1(CBS *cbs, CBS *out, unsigned tag_value,
+                        int skip_header) {
+  size_t header_len;
+  unsigned tag;
+  CBS throwaway;
+
+  if (out == NULL) {
+    out = &throwaway;
+  }
+
+  if (!CBS_get_any_asn1_element(cbs, out, &tag, &header_len) ||
+      tag != tag_value) {
+    return 0;
+  }
+
+  if (skip_header && !CBS_skip(out, header_len)) {
+    assert(0);
+    return 0;
+  }
+
+  return 1;
+}
+
+int CBS_get_asn1(CBS *cbs, CBS *out, unsigned tag_value) {
+  return cbs_get_asn1(cbs, out, tag_value, 1 /* skip header */);
+}
+
+int CBS_get_asn1_element(CBS *cbs, CBS *out, unsigned tag_value) {
+  return cbs_get_asn1(cbs, out, tag_value, 0 /* include header */);
+}
+
+int CBS_peek_asn1_tag(const CBS *cbs, unsigned tag_value) {
+  if (CBS_len(cbs) < 1) {
+    return 0;
+  }
+
+  CBS copy = *cbs;
+  unsigned actual_tag;
+  return parse_asn1_tag(&copy, &actual_tag) && tag_value == actual_tag;
+}
+
+int CBS_get_asn1_uint64(CBS *cbs, uint64_t *out) {
+  CBS bytes;
+  if (!CBS_get_asn1(cbs, &bytes, CBS_ASN1_INTEGER)) {
+    return 0;
+  }
+
+  *out = 0;
+  const uint8_t *data = CBS_data(&bytes);
+  size_t len = CBS_len(&bytes);
+
+  if (len == 0) {
+    // An INTEGER is encoded with at least one octet.
+    return 0;
+  }
+
+  if ((data[0] & 0x80) != 0) {
+    // Negative number.
+    return 0;
+  }
+
+  if (data[0] == 0 && len > 1 && (data[1] & 0x80) == 0) {
+    // Extra leading zeros.
+    return 0;
+  }
+
+  for (size_t i = 0; i < len; i++) {
+    if ((*out >> 56) != 0) {
+      // Too large to represent as a uint64_t.
+      return 0;
+    }
+    *out <<= 8;
+    *out |= data[i];
+  }
+
+  return 1;
+}
+
+int CBS_get_asn1_int64(CBS *cbs, int64_t *out) {
+  CBS bytes;
+  if (!CBS_get_asn1(cbs, &bytes, CBS_ASN1_INTEGER)) {
+    return 0;
+  }
+  const uint8_t *data = CBS_data(&bytes);
+  const size_t len = CBS_len(&bytes);
+
+  if (len == 0 || len > sizeof(int64_t)) {
+    // An INTEGER is encoded with at least one octet.
+    return 0;
+  }
+  if (len > 1) {
+    if (data[0] == 0 && (data[1] & 0x80) == 0) {
+      return 0;  // Extra leading zeros.
+    }
+    if (data[0] == 0xff && (data[1] & 0x80) != 0) {
+      return 0;  // Extra leading 0xff.
+    }
+  }
+
+  union {
+    int64_t i;
+    uint8_t bytes[sizeof(int64_t)];
+  } u;
+  const int is_negative = (data[0] & 0x80);
+  memset(u.bytes, is_negative ? 0xff : 0, sizeof(u.bytes));  // Sign-extend.
+  for (size_t i = 0; i < len; i++) {
+    u.bytes[i] = data[len - i - 1];
+  }
+  *out = u.i;
+  return 1;
+}
+
+int CBS_get_asn1_bool(CBS *cbs, int *out) {
+  CBS bytes;
+  if (!CBS_get_asn1(cbs, &bytes, CBS_ASN1_BOOLEAN) ||
+      CBS_len(&bytes) != 1) {
+    return 0;
+  }
+
+  const uint8_t value = *CBS_data(&bytes);
+  if (value != 0 && value != 0xff) {
+    return 0;
+  }
+
+  *out = !!value;
+  return 1;
+}
+
+int CBS_get_optional_asn1(CBS *cbs, CBS *out, int *out_present, unsigned tag) {
+  int present = 0;
+
+  if (CBS_peek_asn1_tag(cbs, tag)) {
+    if (!CBS_get_asn1(cbs, out, tag)) {
+      return 0;
+    }
+    present = 1;
+  }
+
+  if (out_present != NULL) {
+    *out_present = present;
+  }
+
+  return 1;
+}
+
+int CBS_get_optional_asn1_octet_string(CBS *cbs, CBS *out, int *out_present,
+                                       unsigned tag) {
+  CBS child;
+  int present;
+  if (!CBS_get_optional_asn1(cbs, &child, &present, tag)) {
+    return 0;
+  }
+  if (present) {
+    assert(out);
+    if (!CBS_get_asn1(&child, out, CBS_ASN1_OCTETSTRING) ||
+        CBS_len(&child) != 0) {
+      return 0;
+    }
+  } else {
+    CBS_init(out, NULL, 0);
+  }
+  if (out_present) {
+    *out_present = present;
+  }
+  return 1;
+}
+
+int CBS_get_optional_asn1_uint64(CBS *cbs, uint64_t *out, unsigned tag,
+                                 uint64_t default_value) {
+  CBS child;
+  int present;
+  if (!CBS_get_optional_asn1(cbs, &child, &present, tag)) {
+    return 0;
+  }
+  if (present) {
+    if (!CBS_get_asn1_uint64(&child, out) ||
+        CBS_len(&child) != 0) {
+      return 0;
+    }
+  } else {
+    *out = default_value;
+  }
+  return 1;
+}
+
+int CBS_get_optional_asn1_bool(CBS *cbs, int *out, unsigned tag,
+                               int default_value) {
+  CBS child, child2;
+  int present;
+  if (!CBS_get_optional_asn1(cbs, &child, &present, tag)) {
+    return 0;
+  }
+  if (present) {
+    uint8_t boolean;
+
+    if (!CBS_get_asn1(&child, &child2, CBS_ASN1_BOOLEAN) ||
+        CBS_len(&child2) != 1 ||
+        CBS_len(&child) != 0) {
+      return 0;
+    }
+
+    boolean = CBS_data(&child2)[0];
+    if (boolean == 0) {
+      *out = 0;
+    } else if (boolean == 0xff) {
+      *out = 1;
+    } else {
+      return 0;
+    }
+  } else {
+    *out = default_value;
+  }
+  return 1;
+}
+
+int CBS_is_valid_asn1_bitstring(const CBS *cbs) {
+  CBS in = *cbs;
+  uint8_t num_unused_bits;
+  if (!CBS_get_u8(&in, &num_unused_bits) ||
+      num_unused_bits > 7) {
+    return 0;
+  }
+
+  if (num_unused_bits == 0) {
+    return 1;
+  }
+
+  // All num_unused_bits bits must exist and be zeros.
+  uint8_t last;
+  if (!CBS_get_last_u8(&in, &last) ||
+      (last & ((1 << num_unused_bits) - 1)) != 0) {
+    return 0;
+  }
+
+  return 1;
+}
+
+int CBS_asn1_bitstring_has_bit(const CBS *cbs, unsigned bit) {
+  if (!CBS_is_valid_asn1_bitstring(cbs)) {
+    return 0;
+  }
+
+  const unsigned byte_num = (bit >> 3) + 1;
+  const unsigned bit_num = 7 - (bit & 7);
+
+  // Unused bits are zero, and this function does not distinguish between
+  // missing and unset bits. Thus it is sufficient to do a byte-level length
+  // check.
+  return byte_num < CBS_len(cbs) &&
+         (CBS_data(cbs)[byte_num] & (1 << bit_num)) != 0;
+}
+
+static int add_decimal(CBB *out, uint64_t v) {
+  char buf[DECIMAL_SIZE(uint64_t) + 1];
+  BIO_snprintf(buf, sizeof(buf), "%" PRIu64, v);
+  return CBB_add_bytes(out, (const uint8_t *)buf, strlen(buf));
+}
+
+char *CBS_asn1_oid_to_text(const CBS *cbs) {
+  CBB cbb;
+  if (!CBB_init(&cbb, 32)) {
+    goto err;
+  }
+
+  CBS copy = *cbs;
+  // The first component is 40 * value1 + value2, where value1 is 0, 1, or 2.
+  uint64_t v;
+  if (!parse_base128_integer(&copy, &v)) {
+    goto err;
+  }
+
+  if (v >= 80) {
+    if (!CBB_add_bytes(&cbb, (const uint8_t *)"2.", 2) ||
+        !add_decimal(&cbb, v - 80)) {
+      goto err;
+    }
+  } else if (!add_decimal(&cbb, v / 40) ||
+             !CBB_add_u8(&cbb, '.') ||
+             !add_decimal(&cbb, v % 40)) {
+    goto err;
+  }
+
+  while (CBS_len(&copy) != 0) {
+    if (!parse_base128_integer(&copy, &v) ||
+        !CBB_add_u8(&cbb, '.') ||
+        !add_decimal(&cbb, v)) {
+      goto err;
+    }
+  }
+
+  uint8_t *txt;
+  size_t txt_len;
+  if (!CBB_add_u8(&cbb, '\0') ||
+      !CBB_finish(&cbb, &txt, &txt_len)) {
+    goto err;
+  }
+
+  return (char *)txt;
+
+err:
+  CBB_cleanup(&cbb);
+  return NULL;
+}
--- a/Sources/CBigNumBoringSSL/crypto/bytestring/internal.h
+++ b/Sources/CBigNumBoringSSL/crypto/bytestring/internal.h
@ -0,0 +1,96 @@
+/* Copyright (c) 2014, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#ifndef OPENSSL_HEADER_BYTESTRING_INTERNAL_H
+#define OPENSSL_HEADER_BYTESTRING_INTERNAL_H
+
+#include <CBigNumBoringSSL_base.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+// CBS_asn1_ber_to_der reads a BER element from |in|. If it finds
+// indefinite-length elements or constructed strings then it converts the BER
+// data to DER, sets |out| to the converted contents and |*out_storage| to a
+// buffer which the caller must release with |OPENSSL_free|. Otherwise, it sets
+// |out| to the original BER element in |in| and |*out_storage| to NULL.
+// Additionally, |*in| will be advanced over the BER element.
+//
+// This function should successfully process any valid BER input, however it
+// will not convert all of BER's deviations from DER. BER is ambiguous between
+// implicitly-tagged SEQUENCEs of strings and implicitly-tagged constructed
+// strings. Implicitly-tagged strings must be parsed with
+// |CBS_get_ber_implicitly_tagged_string| instead of |CBS_get_asn1|. The caller
+// must also account for BER variations in the contents of a primitive.
+//
+// It returns one on success and zero otherwise.
+OPENSSL_EXPORT int CBS_asn1_ber_to_der(CBS *in, CBS *out,
+                                       uint8_t **out_storage);
+
+// CBS_get_asn1_implicit_string parses a BER string of primitive type
+// |inner_tag| implicitly-tagged with |outer_tag|. It sets |out| to the
+// contents. If concatenation was needed, it sets |*out_storage| to a buffer
+// which the caller must release with |OPENSSL_free|. Otherwise, it sets
+// |*out_storage| to NULL.
+//
+// This function does not parse all of BER. It requires the string be
+// definite-length. Constructed strings are allowed, but all children of the
+// outermost element must be primitive. The caller should use
+// |CBS_asn1_ber_to_der| before running this function.
+//
+// It returns one on success and zero otherwise.
+OPENSSL_EXPORT int CBS_get_asn1_implicit_string(CBS *in, CBS *out,
+                                                uint8_t **out_storage,
+                                                unsigned outer_tag,
+                                                unsigned inner_tag);
+
+// CBB_finish_i2d calls |CBB_finish| on |cbb| which must have been initialized
+// with |CBB_init|. If |outp| is not NULL then the result is written to |*outp|
+// and |*outp| is advanced just past the output. It returns the number of bytes
+// in the result, whether written or not, or a negative value on error. On
+// error, it calls |CBB_cleanup| on |cbb|.
+//
+// This function may be used to help implement legacy i2d ASN.1 functions.
+int CBB_finish_i2d(CBB *cbb, uint8_t **outp);
+
+
+// Unicode utilities.
+
+// The following functions read one Unicode code point from |cbs| with the
+// corresponding encoding and store it in |*out|. They return one on success and
+// zero on error.
+OPENSSL_EXPORT int cbs_get_utf8(CBS *cbs, uint32_t *out);
+OPENSSL_EXPORT int cbs_get_latin1(CBS *cbs, uint32_t *out);
+OPENSSL_EXPORT int cbs_get_ucs2_be(CBS *cbs, uint32_t *out);
+OPENSSL_EXPORT int cbs_get_utf32_be(CBS *cbs, uint32_t *out);
+
+// cbb_get_utf8_len returns the number of bytes needed to represent |u| in
+// UTF-8.
+OPENSSL_EXPORT size_t cbb_get_utf8_len(uint32_t u);
+
+// The following functions encode |u| to |cbb| with the corresponding
+// encoding. They return one on success and zero on error.
+OPENSSL_EXPORT int cbb_add_utf8(CBB *cbb, uint32_t u);
+OPENSSL_EXPORT int cbb_add_latin1(CBB *cbb, uint32_t u);
+OPENSSL_EXPORT int cbb_add_ucs2_be(CBB *cbb, uint32_t u);
+OPENSSL_EXPORT int cbb_add_utf32_be(CBB *cbb, uint32_t u);
+
+
+#if defined(__cplusplus)
+}  // extern C
+#endif
+
+#endif  // OPENSSL_HEADER_BYTESTRING_INTERNAL_H
--- a/Sources/CBigNumBoringSSL/crypto/bytestring/unicode.c
+++ b/Sources/CBigNumBoringSSL/crypto/bytestring/unicode.c
@ -0,0 +1,155 @@
+/* Copyright (c) 2018, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <CBigNumBoringSSL_bytestring.h>
+
+#include "internal.h"
+
+
+static int is_valid_code_point(uint32_t v) {
+  // References in the following are to Unicode 9.0.0.
+  if (// The Unicode space runs from zero to 0x10ffff (3.4 D9).
+      v > 0x10ffff ||
+      // Values 0x...fffe, 0x...ffff, and 0xfdd0-0xfdef are permanently reserved
+      // (3.4 D14)
+      (v & 0xfffe) == 0xfffe ||
+      (v >= 0xfdd0 && v <= 0xfdef) ||
+      // Surrogate code points are invalid (3.2 C1).
+      (v >= 0xd800 && v <= 0xdfff)) {
+    return 0;
+  }
+  return 1;
+}
+
+// BOTTOM_BITS returns a byte with the bottom |n| bits set.
+#define BOTTOM_BITS(n) (uint8_t)((1u << (n)) - 1)
+
+// TOP_BITS returns a byte with the top |n| bits set.
+#define TOP_BITS(n) ((uint8_t)~BOTTOM_BITS(8 - (n)))
+
+int cbs_get_utf8(CBS *cbs, uint32_t *out) {
+  uint8_t c;
+  if (!CBS_get_u8(cbs, &c)) {
+    return 0;
+  }
+  if (c <= 0x7f) {
+    *out = c;
+    return 1;
+  }
+  uint32_t v, lower_bound;
+  size_t len;
+  if ((c & TOP_BITS(3)) == TOP_BITS(2)) {
+    v = c & BOTTOM_BITS(5);
+    len = 1;
+    lower_bound = 0x80;
+  } else if ((c & TOP_BITS(4)) == TOP_BITS(3)) {
+    v = c & BOTTOM_BITS(4);
+    len = 2;
+    lower_bound = 0x800;
+  } else if ((c & TOP_BITS(5)) == TOP_BITS(4)) {
+    v = c & BOTTOM_BITS(3);
+    len = 3;
+    lower_bound = 0x10000;
+  } else {
+    return 0;
+  }
+  for (size_t i = 0; i < len; i++) {
+    if (!CBS_get_u8(cbs, &c) ||
+        (c & TOP_BITS(2)) != TOP_BITS(1)) {
+      return 0;
+    }
+    v <<= 6;
+    v |= c & BOTTOM_BITS(6);
+  }
+  if (!is_valid_code_point(v) ||
+      v < lower_bound) {
+    return 0;
+  }
+  *out = v;
+  return 1;
+}
+
+int cbs_get_latin1(CBS *cbs, uint32_t *out) {
+  uint8_t c;
+  if (!CBS_get_u8(cbs, &c)) {
+    return 0;
+  }
+  *out = c;
+  return 1;
+}
+
+int cbs_get_ucs2_be(CBS *cbs, uint32_t *out) {
+  // Note UCS-2 (used by BMPString) does not support surrogates.
+  uint16_t c;
+  if (!CBS_get_u16(cbs, &c) ||
+      !is_valid_code_point(c)) {
+    return 0;
+  }
+  *out = c;
+  return 1;
+}
+
+int cbs_get_utf32_be(CBS *cbs, uint32_t *out) {
+  return CBS_get_u32(cbs, out) && is_valid_code_point(*out);
+}
+
+size_t cbb_get_utf8_len(uint32_t u) {
+  if (u <= 0x7f) {
+    return 1;
+  }
+  if (u <= 0x7ff) {
+    return 2;
+  }
+  if (u <= 0xffff) {
+    return 3;
+  }
+  return 4;
+}
+
+int cbb_add_utf8(CBB *cbb, uint32_t u) {
+  if (!is_valid_code_point(u)) {
+    return 0;
+  }
+  if (u <= 0x7f) {
+    return CBB_add_u8(cbb, (uint8_t)u);
+  }
+  if (u <= 0x7ff) {
+    return CBB_add_u8(cbb, TOP_BITS(2) | (u >> 6)) &&
+           CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
+  }
+  if (u <= 0xffff) {
+    return CBB_add_u8(cbb, TOP_BITS(3) | (u >> 12)) &&
+           CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) &&
+           CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
+  }
+  if (u <= 0x10ffff) {
+    return CBB_add_u8(cbb, TOP_BITS(4) | (u >> 18)) &&
+           CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 12) & BOTTOM_BITS(6))) &&
+           CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) &&
+           CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6)));
+  }
+  return 0;
+}
+
+int cbb_add_latin1(CBB *cbb, uint32_t u) {
+  return u <= 0xff && CBB_add_u8(cbb, (uint8_t)u);
+}
+
+int cbb_add_ucs2_be(CBB *cbb, uint32_t u) {
+  return u <= 0xffff && is_valid_code_point(u) && CBB_add_u16(cbb, (uint16_t)u);
+}
+
+int cbb_add_utf32_be(CBB *cbb, uint32_t u) {
+  return is_valid_code_point(u) && CBB_add_u32(cbb, u);
+}
--- a/Sources/CBigNumBoringSSL/crypto/cpu-aarch64-fuchsia.c
+++ b/Sources/CBigNumBoringSSL/crypto/cpu-aarch64-fuchsia.c
@ -0,0 +1,55 @@
+/* Copyright (c) 2018, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <CBigNumBoringSSL_cpu.h>
+
+#if defined(OPENSSL_AARCH64) && defined(OPENSSL_FUCHSIA) && \
+    !defined(OPENSSL_STATIC_ARMCAP)
+
+#include <zircon/features.h>
+#include <zircon/syscalls.h>
+#include <zircon/types.h>
+
+#include <CBigNumBoringSSL_arm_arch.h>
+
+#include "internal.h"
+
+extern uint32_t OPENSSL_armcap_P;
+
+void OPENSSL_cpuid_setup(void) {
+  uint32_t hwcap;
+  zx_status_t rc = zx_system_get_features(ZX_FEATURE_KIND_CPU, &hwcap);
+  if (rc != ZX_OK || (hwcap & ZX_ARM64_FEATURE_ISA_ASIMD) == 0) {
+    // Matching OpenSSL, if NEON/ASIMD is missing, don't report other features
+    // either.
+    return;
+  }
+
+  OPENSSL_armcap_P |= ARMV7_NEON;
+
+  if (hwcap & ZX_ARM64_FEATURE_ISA_AES) {
+    OPENSSL_armcap_P |= ARMV8_AES;
+  }
+  if (hwcap & ZX_ARM64_FEATURE_ISA_PMULL) {
+    OPENSSL_armcap_P |= ARMV8_PMULL;
+  }
+  if (hwcap & ZX_ARM64_FEATURE_ISA_SHA1) {
+    OPENSSL_armcap_P |= ARMV8_SHA1;
+  }
+  if (hwcap & ZX_ARM64_FEATURE_ISA_SHA2) {
+    OPENSSL_armcap_P |= ARMV8_SHA256;
+  }
+}
+
+#endif  // OPENSSL_AARCH64 && !OPENSSL_STATIC_ARMCAP
--- a/Sources/CBigNumBoringSSL/crypto/cpu-aarch64-linux.c
+++ b/Sources/CBigNumBoringSSL/crypto/cpu-aarch64-linux.c
@ -0,0 +1,62 @@
+/* Copyright (c) 2016, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <CBigNumBoringSSL_cpu.h>
+
+#if defined(OPENSSL_AARCH64) && defined(OPENSSL_LINUX) && \
+    !defined(OPENSSL_STATIC_ARMCAP)
+
+#include <sys/auxv.h>
+
+#include <CBigNumBoringSSL_arm_arch.h>
+
+#include "internal.h"
+
+
+extern uint32_t OPENSSL_armcap_P;
+
+void OPENSSL_cpuid_setup(void) {
+  unsigned long hwcap = getauxval(AT_HWCAP);
+
+  // See /usr/include/asm/hwcap.h on an aarch64 installation for the source of
+  // these values.
+  static const unsigned long kNEON = 1 << 1;
+  static const unsigned long kAES = 1 << 3;
+  static const unsigned long kPMULL = 1 << 4;
+  static const unsigned long kSHA1 = 1 << 5;
+  static const unsigned long kSHA256 = 1 << 6;
+
+  if ((hwcap & kNEON) == 0) {
+    // Matching OpenSSL, if NEON is missing, don't report other features
+    // either.
+    return;
+  }
+
+  OPENSSL_armcap_P |= ARMV7_NEON;
+
+  if (hwcap & kAES) {
+    OPENSSL_armcap_P |= ARMV8_AES;
+  }
+  if (hwcap & kPMULL) {
+    OPENSSL_armcap_P |= ARMV8_PMULL;
+  }
+  if (hwcap & kSHA1) {
+    OPENSSL_armcap_P |= ARMV8_SHA1;
+  }
+  if (hwcap & kSHA256) {
+    OPENSSL_armcap_P |= ARMV8_SHA256;
+  }
+}
+
+#endif  // OPENSSL_AARCH64 && !OPENSSL_STATIC_ARMCAP
--- a/Sources/CBigNumBoringSSL/crypto/cpu-arm-linux.c
+++ b/Sources/CBigNumBoringSSL/crypto/cpu-arm-linux.c
@ -0,0 +1,218 @@
+/* Copyright (c) 2016, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <CBigNumBoringSSL_cpu.h>
+
+#if defined(OPENSSL_ARM) && !defined(OPENSSL_STATIC_ARMCAP)
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include <CBigNumBoringSSL_arm_arch.h>
+#include <CBigNumBoringSSL_mem.h>
+
+#include "cpu-arm-linux.h"
+
+#define AT_HWCAP 16
+#define AT_HWCAP2 26
+
+// |getauxval| is not available on Android until API level 20. Link it as a weak
+// symbol and use other methods as fallback.
+unsigned long getauxval(unsigned long type) __attribute__((weak));
+
+static int open_eintr(const char *path, int flags) {
+  int ret;
+  do {
+    ret = open(path, flags);
+  } while (ret < 0 && errno == EINTR);
+  return ret;
+}
+
+static ssize_t read_eintr(int fd, void *out, size_t len) {
+  ssize_t ret;
+  do {
+    ret = read(fd, out, len);
+  } while (ret < 0 && errno == EINTR);
+  return ret;
+}
+
+// read_full reads exactly |len| bytes from |fd| to |out|. On error or end of
+// file, it returns zero.
+static int read_full(int fd, void *out, size_t len) {
+  char *outp = out;
+  while (len > 0) {
+    ssize_t ret = read_eintr(fd, outp, len);
+    if (ret <= 0) {
+      return 0;
+    }
+    outp += ret;
+    len -= ret;
+  }
+  return 1;
+}
+
+// read_file opens |path| and reads until end-of-file. On success, it returns
+// one and sets |*out_ptr| and |*out_len| to a newly-allocated buffer with the
+// contents. Otherwise, it returns zero.
+static int read_file(char **out_ptr, size_t *out_len, const char *path) {
+  int fd = open_eintr(path, O_RDONLY);
+  if (fd < 0) {
+    return 0;
+  }
+
+  static const size_t kReadSize = 1024;
+  int ret = 0;
+  size_t cap = kReadSize, len = 0;
+  char *buf = OPENSSL_malloc(cap);
+  if (buf == NULL) {
+    goto err;
+  }
+
+  for (;;) {
+    if (cap - len < kReadSize) {
+      size_t new_cap = cap * 2;
+      if (new_cap < cap) {
+        goto err;
+      }
+      char *new_buf = OPENSSL_realloc(buf, new_cap);
+      if (new_buf == NULL) {
+        goto err;
+      }
+      buf = new_buf;
+      cap = new_cap;
+    }
+
+    ssize_t bytes_read = read_eintr(fd, buf + len, kReadSize);
+    if (bytes_read < 0) {
+      goto err;
+    }
+    if (bytes_read == 0) {
+      break;
+    }
+    len += bytes_read;
+  }
+
+  *out_ptr = buf;
+  *out_len = len;
+  ret = 1;
+  buf = NULL;
+
+err:
+  OPENSSL_free(buf);
+  close(fd);
+  return ret;
+}
+
+// getauxval_proc behaves like |getauxval| but reads from /proc/self/auxv.
+static unsigned long getauxval_proc(unsigned long type) {
+  int fd = open_eintr("/proc/self/auxv", O_RDONLY);
+  if (fd < 0) {
+    return 0;
+  }
+
+  struct {
+    unsigned long tag;
+    unsigned long value;
+  } entry;
+
+  for (;;) {
+    if (!read_full(fd, &entry, sizeof(entry)) ||
+        (entry.tag == 0 && entry.value == 0)) {
+      break;
+    }
+    if (entry.tag == type) {
+      close(fd);
+      return entry.value;
+    }
+  }
+  close(fd);
+  return 0;
+}
+
+extern uint32_t OPENSSL_armcap_P;
+
+static int g_has_broken_neon, g_needs_hwcap2_workaround;
+
+void OPENSSL_cpuid_setup(void) {
+  char *cpuinfo_data;
+  size_t cpuinfo_len;
+  if (!read_file(&cpuinfo_data, &cpuinfo_len, "/proc/cpuinfo")) {
+    return;
+  }
+  STRING_PIECE cpuinfo;
+  cpuinfo.data = cpuinfo_data;
+  cpuinfo.len = cpuinfo_len;
+
+  // |getauxval| is not available on Android until API level 20. If it is
+  // unavailable, read from /proc/self/auxv as a fallback. This is unreadable
+  // on some versions of Android, so further fall back to /proc/cpuinfo.
+  //
+  // See
+  // https://android.googlesource.com/platform/ndk/+/882ac8f3392858991a0e1af33b4b7387ec856bd2
+  // and b/13679666 (Google-internal) for details.
+  unsigned long hwcap = 0;
+  if (getauxval != NULL) {
+    hwcap = getauxval(AT_HWCAP);
+  }
+  if (hwcap == 0) {
+    hwcap = getauxval_proc(AT_HWCAP);
+  }
+  if (hwcap == 0) {
+    hwcap = crypto_get_arm_hwcap_from_cpuinfo(&cpuinfo);
+  }
+
+  // Clear NEON support if known broken.
+  g_has_broken_neon = crypto_cpuinfo_has_broken_neon(&cpuinfo);
+  if (g_has_broken_neon) {
+    hwcap &= ~HWCAP_NEON;
+  }
+
+  // Matching OpenSSL, only report other features if NEON is present.
+  if (hwcap & HWCAP_NEON) {
+    OPENSSL_armcap_P |= ARMV7_NEON;
+
+    // Some ARMv8 Android devices don't expose AT_HWCAP2. Fall back to
+    // /proc/cpuinfo. See https://crbug.com/596156.
+    unsigned long hwcap2 = 0;
+    if (getauxval != NULL) {
+      hwcap2 = getauxval(AT_HWCAP2);
+    }
+    if (hwcap2 == 0) {
+      hwcap2 = crypto_get_arm_hwcap2_from_cpuinfo(&cpuinfo);
+      g_needs_hwcap2_workaround = hwcap2 != 0;
+    }
+
+    if (hwcap2 & HWCAP2_AES) {
+      OPENSSL_armcap_P |= ARMV8_AES;
+    }
+    if (hwcap2 & HWCAP2_PMULL) {
+      OPENSSL_armcap_P |= ARMV8_PMULL;
+    }
+    if (hwcap2 & HWCAP2_SHA1) {
+      OPENSSL_armcap_P |= ARMV8_SHA1;
+    }
+    if (hwcap2 & HWCAP2_SHA2) {
+      OPENSSL_armcap_P |= ARMV8_SHA256;
+    }
+  }
+
+  OPENSSL_free(cpuinfo_data);
+}
+
+int CRYPTO_has_broken_NEON(void) { return g_has_broken_neon; }
+
+int CRYPTO_needs_hwcap2_workaround(void) { return g_needs_hwcap2_workaround; }
+
+#endif  // OPENSSL_ARM && !OPENSSL_STATIC_ARMCAP
--- a/Sources/CBigNumBoringSSL/crypto/cpu-arm-linux.h
+++ b/Sources/CBigNumBoringSSL/crypto/cpu-arm-linux.h
@ -0,0 +1,201 @@
+/* Copyright (c) 2018, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#ifndef OPENSSL_HEADER_CRYPTO_CPU_ARM_LINUX_H
+#define OPENSSL_HEADER_CRYPTO_CPU_ARM_LINUX_H
+
+#include <CBigNumBoringSSL_base.h>
+
+#include <string.h>
+
+#include "internal.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+// The cpuinfo parser lives in a header file so it may be accessible from
+// cross-platform fuzzers without adding code to those platforms normally.
+
+#define HWCAP_NEON (1 << 12)
+
+// See /usr/include/asm/hwcap.h on an ARM installation for the source of
+// these values.
+#define HWCAP2_AES (1 << 0)
+#define HWCAP2_PMULL (1 << 1)
+#define HWCAP2_SHA1 (1 << 2)
+#define HWCAP2_SHA2 (1 << 3)
+
+typedef struct {
+  const char *data;
+  size_t len;
+} STRING_PIECE;
+
+static int STRING_PIECE_equals(const STRING_PIECE *a, const char *b) {
+  size_t b_len = strlen(b);
+  return a->len == b_len && OPENSSL_memcmp(a->data, b, b_len) == 0;
+}
+
+// STRING_PIECE_split finds the first occurence of |sep| in |in| and, if found,
+// sets |*out_left| and |*out_right| to |in| split before and after it. It
+// returns one if |sep| was found and zero otherwise.
+static int STRING_PIECE_split(STRING_PIECE *out_left, STRING_PIECE *out_right,
+                              const STRING_PIECE *in, char sep) {
+  const char *p = (const char *)OPENSSL_memchr(in->data, sep, in->len);
+  if (p == NULL) {
+    return 0;
+  }
+  // |out_left| or |out_right| may alias |in|, so make a copy.
+  STRING_PIECE in_copy = *in;
+  out_left->data = in_copy.data;
+  out_left->len = p - in_copy.data;
+  out_right->data = in_copy.data + out_left->len + 1;
+  out_right->len = in_copy.len - out_left->len - 1;
+  return 1;
+}
+
+// STRING_PIECE_get_delimited reads a |sep|-delimited entry from |s|, writing it
+// to |out| and updating |s| to point beyond it. It returns one on success and
+// zero if |s| is empty. If |s| is has no copies of |sep| and is non-empty, it
+// reads the entire string to |out|.
+static int STRING_PIECE_get_delimited(STRING_PIECE *s, STRING_PIECE *out, char sep) {
+  if (s->len == 0) {
+    return 0;
+  }
+  if (!STRING_PIECE_split(out, s, s, sep)) {
+    // |s| had no instances of |sep|. Return the entire string.
+    *out = *s;
+    s->data += s->len;
+    s->len = 0;
+  }
+  return 1;
+}
+
+// STRING_PIECE_trim removes leading and trailing whitespace from |s|.
+static void STRING_PIECE_trim(STRING_PIECE *s) {
+  while (s->len != 0 && (s->data[0] == ' ' || s->data[0] == '\t')) {
+    s->data++;
+    s->len--;
+  }
+  while (s->len != 0 &&
+         (s->data[s->len - 1] == ' ' || s->data[s->len - 1] == '\t')) {
+    s->len--;
+  }
+}
+
+// extract_cpuinfo_field extracts a /proc/cpuinfo field named |field| from
+// |in|. If found, it sets |*out| to the value and returns one. Otherwise, it
+// returns zero.
+static int extract_cpuinfo_field(STRING_PIECE *out, const STRING_PIECE *in,
+                                 const char *field) {
+  // Process |in| one line at a time.
+  STRING_PIECE remaining = *in, line;
+  while (STRING_PIECE_get_delimited(&remaining, &line, '\n')) {
+    STRING_PIECE key, value;
+    if (!STRING_PIECE_split(&key, &value, &line, ':')) {
+      continue;
+    }
+    STRING_PIECE_trim(&key);
+    if (STRING_PIECE_equals(&key, field)) {
+      STRING_PIECE_trim(&value);
+      *out = value;
+      return 1;
+    }
+  }
+
+  return 0;
+}
+
+static int cpuinfo_field_equals(const STRING_PIECE *cpuinfo, const char *field,
+                                const char *value) {
+  STRING_PIECE extracted;
+  return extract_cpuinfo_field(&extracted, cpuinfo, field) &&
+         STRING_PIECE_equals(&extracted, value);
+}
+
+// has_list_item treats |list| as a space-separated list of items and returns
+// one if |item| is contained in |list| and zero otherwise.
+static int has_list_item(const STRING_PIECE *list, const char *item) {
+  STRING_PIECE remaining = *list, feature;
+  while (STRING_PIECE_get_delimited(&remaining, &feature, ' ')) {
+    if (STRING_PIECE_equals(&feature, item)) {
+      return 1;
+    }
+  }
+  return 0;
+}
+
+// crypto_get_arm_hwcap_from_cpuinfo returns an equivalent ARM |AT_HWCAP| value
+// from |cpuinfo|.
+static unsigned long crypto_get_arm_hwcap_from_cpuinfo(
+    const STRING_PIECE *cpuinfo) {
+  if (cpuinfo_field_equals(cpuinfo, "CPU architecture", "8")) {
+    // This is a 32-bit ARM binary running on a 64-bit kernel. NEON is always
+    // available on ARMv8. Linux omits required features, so reading the
+    // "Features" line does not work. (For simplicity, use strict equality. We
+    // assume everything running on future ARM architectures will have a
+    // working |getauxval|.)
+    return HWCAP_NEON;
+  }
+
+  STRING_PIECE features;
+  if (extract_cpuinfo_field(&features, cpuinfo, "Features") &&
+      has_list_item(&features, "neon")) {
+    return HWCAP_NEON;
+  }
+  return 0;
+}
+
+// crypto_get_arm_hwcap2_from_cpuinfo returns an equivalent ARM |AT_HWCAP2|
+// value from |cpuinfo|.
+static unsigned long crypto_get_arm_hwcap2_from_cpuinfo(
+    const STRING_PIECE *cpuinfo) {
+  STRING_PIECE features;
+  if (!extract_cpuinfo_field(&features, cpuinfo, "Features")) {
+    return 0;
+  }
+
+  unsigned long ret = 0;
+  if (has_list_item(&features, "aes")) {
+    ret |= HWCAP2_AES;
+  }
+  if (has_list_item(&features, "pmull")) {
+    ret |= HWCAP2_PMULL;
+  }
+  if (has_list_item(&features, "sha1")) {
+    ret |= HWCAP2_SHA1;
+  }
+  if (has_list_item(&features, "sha2")) {
+    ret |= HWCAP2_SHA2;
+  }
+  return ret;
+}
+
+// crypto_cpuinfo_has_broken_neon returns one if |cpuinfo| matches a CPU known
+// to have broken NEON unit and zero otherwise. See https://crbug.com/341598.
+static int crypto_cpuinfo_has_broken_neon(const STRING_PIECE *cpuinfo) {
+  return cpuinfo_field_equals(cpuinfo, "CPU implementer", "0x51") &&
+         cpuinfo_field_equals(cpuinfo, "CPU architecture", "7") &&
+         cpuinfo_field_equals(cpuinfo, "CPU variant", "0x1") &&
+         cpuinfo_field_equals(cpuinfo, "CPU part", "0x04d") &&
+         cpuinfo_field_equals(cpuinfo, "CPU revision", "0");
+}
+
+
+#if defined(__cplusplus)
+}  // extern C
+#endif
+
+#endif  // OPENSSL_HEADER_CRYPTO_CPU_ARM_LINUX_H
--- a/Sources/CBigNumBoringSSL/crypto/cpu-arm.c
+++ b/Sources/CBigNumBoringSSL/crypto/cpu-arm.c
@ -0,0 +1,38 @@
+/* Copyright (c) 2014, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <CBigNumBoringSSL_cpu.h>
+
+#if (defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) && \
+    !defined(OPENSSL_STATIC_ARMCAP)
+
+#include <CBigNumBoringSSL_arm_arch.h>
+
+
+extern uint32_t OPENSSL_armcap_P;
+
+char CRYPTO_is_NEON_capable_at_runtime(void) {
+  return (OPENSSL_armcap_P & ARMV7_NEON) != 0;
+}
+
+int CRYPTO_is_ARMv8_AES_capable(void) {
+  return (OPENSSL_armcap_P & ARMV8_AES) != 0;
+}
+
+int CRYPTO_is_ARMv8_PMULL_capable(void) {
+  return (OPENSSL_armcap_P & ARMV8_PMULL) != 0;
+}
+
+#endif  /* (defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)) &&
+           !defined(OPENSSL_STATIC_ARMCAP) */
--- a/Sources/CBigNumBoringSSL/crypto/cpu-intel.c
+++ b/Sources/CBigNumBoringSSL/crypto/cpu-intel.c
@ -0,0 +1,291 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.] */
+
+#include <CBigNumBoringSSL_cpu.h>
+
+
+#if !defined(OPENSSL_NO_ASM) && (defined(OPENSSL_X86) || defined(OPENSSL_X86_64))
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(_MSC_VER)
+OPENSSL_MSVC_PRAGMA(warning(push, 3))
+#include <immintrin.h>
+#include <intrin.h>
+OPENSSL_MSVC_PRAGMA(warning(pop))
+#endif
+
+#include "internal.h"
+
+
+// OPENSSL_cpuid runs the cpuid instruction. |leaf| is passed in as EAX and ECX
+// is set to zero. It writes EAX, EBX, ECX, and EDX to |*out_eax| through
+// |*out_edx|.
+static void OPENSSL_cpuid(uint32_t *out_eax, uint32_t *out_ebx,
+                          uint32_t *out_ecx, uint32_t *out_edx, uint32_t leaf) {
+#if defined(_MSC_VER)
+  int tmp[4];
+  __cpuid(tmp, (int)leaf);
+  *out_eax = (uint32_t)tmp[0];
+  *out_ebx = (uint32_t)tmp[1];
+  *out_ecx = (uint32_t)tmp[2];
+  *out_edx = (uint32_t)tmp[3];
+#elif defined(__pic__) && defined(OPENSSL_32_BIT)
+  // Inline assembly may not clobber the PIC register. For 32-bit, this is EBX.
+  // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=47602.
+  __asm__ volatile (
+    "xor %%ecx, %%ecx\n"
+    "mov %%ebx, %%edi\n"
+    "cpuid\n"
+    "xchg %%edi, %%ebx\n"
+    : "=a"(*out_eax), "=D"(*out_ebx), "=c"(*out_ecx), "=d"(*out_edx)
+    : "a"(leaf)
+  );
+#else
+  __asm__ volatile (
+    "xor %%ecx, %%ecx\n"
+    "cpuid\n"
+    : "=a"(*out_eax), "=b"(*out_ebx), "=c"(*out_ecx), "=d"(*out_edx)
+    : "a"(leaf)
+  );
+#endif
+}
+
+// OPENSSL_xgetbv returns the value of an Intel Extended Control Register (XCR).
+// Currently only XCR0 is defined by Intel so |xcr| should always be zero.
+static uint64_t OPENSSL_xgetbv(uint32_t xcr) {
+#if defined(_MSC_VER)
+  return (uint64_t)_xgetbv(xcr);
+#else
+  uint32_t eax, edx;
+  __asm__ volatile ("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
+  return (((uint64_t)edx) << 32) | eax;
+#endif
+}
+
+// handle_cpu_env applies the value from |in| to the CPUID values in |out[0]|
+// and |out[1]|. See the comment in |OPENSSL_cpuid_setup| about this.
+static void handle_cpu_env(uint32_t *out, const char *in) {
+  const int invert = in[0] == '~';
+  const int or = in[0] == '|';
+  const int skip_first_byte = invert || or;
+  const int hex = in[skip_first_byte] == '0' && in[skip_first_byte+1] == 'x';
+
+  int sscanf_result;
+  uint64_t v;
+  if (hex) {
+    sscanf_result = sscanf(in + invert + 2, "%" PRIx64, &v);
+  } else {
+    sscanf_result = sscanf(in + invert, "%" PRIu64, &v);
+  }
+
+  if (!sscanf_result) {
+    return;
+  }
+
+  if (invert) {
+    out[0] &= ~v;
+    out[1] &= ~(v >> 32);
+  } else if (or) {
+    out[0] |= v;
+    out[1] |= (v >> 32);
+  } else {
+    out[0] = v;
+    out[1] = v >> 32;
+  }
+}
+
+void OPENSSL_cpuid_setup(void) {
+  // Determine the vendor and maximum input value.
+  uint32_t eax, ebx, ecx, edx;
+  OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 0);
+
+  uint32_t num_ids = eax;
+
+  int is_intel = ebx == 0x756e6547 /* Genu */ &&
+                 edx == 0x49656e69 /* ineI */ &&
+                 ecx == 0x6c65746e /* ntel */;
+  int is_amd = ebx == 0x68747541 /* Auth */ &&
+               edx == 0x69746e65 /* enti */ &&
+               ecx == 0x444d4163 /* cAMD */;
+
+  uint32_t extended_features[2] = {0};
+  if (num_ids >= 7) {
+    OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 7);
+    extended_features[0] = ebx;
+    extended_features[1] = ecx;
+  }
+
+  OPENSSL_cpuid(&eax, &ebx, &ecx, &edx, 1);
+
+  if (is_amd) {
+    // See https://www.amd.com/system/files/TechDocs/25481.pdf, page 10.
+    const uint32_t base_family = (eax >> 8) & 15;
+    const uint32_t base_model = (eax >> 4) & 15;
+
+    uint32_t family = base_family;
+    uint32_t model = base_model;
+    if (base_family == 0xf) {
+      const uint32_t ext_family = (eax >> 20) & 255;
+      family += ext_family;
+      const uint32_t ext_model = (eax >> 16) & 15;
+      model |= ext_model << 4;
+    }
+
+    if (family < 0x17 || (family == 0x17 && 0x70 <= model && model <= 0x7f)) {
+      // Disable RDRAND on AMD families before 0x17 (Zen) due to reported
+      // failures after suspend.
+      // https://bugzilla.redhat.com/show_bug.cgi?id=1150286
+      // Also disable for family 0x17, models 0x70–0x7f, due to possible RDRAND
+      // failures there too.
+      ecx &= ~(1u << 30);
+    }
+  }
+
+  // Force the hyper-threading bit so that the more conservative path is always
+  // chosen.
+  edx |= 1u << 28;
+
+  // Reserved bit #20 was historically repurposed to control the in-memory
+  // representation of RC4 state. Always set it to zero.
+  edx &= ~(1u << 20);
+
+  // Reserved bit #30 is repurposed to signal an Intel CPU.
+  if (is_intel) {
+    edx |= (1u << 30);
+
+    // Clear the XSAVE bit on Knights Landing to mimic Silvermont. This enables
+    // some Silvermont-specific codepaths which perform better. See OpenSSL
+    // commit 64d92d74985ebb3d0be58a9718f9e080a14a8e7f.
+    if ((eax & 0x0fff0ff0) == 0x00050670 /* Knights Landing */ ||
+        (eax & 0x0fff0ff0) == 0x00080650 /* Knights Mill (per SDE) */) {
+      ecx &= ~(1u << 26);
+    }
+  } else {
+    edx &= ~(1u << 30);
+  }
+
+  // The SDBG bit is repurposed to denote AMD XOP support. Don't ever use AMD
+  // XOP code paths.
+  ecx &= ~(1u << 11);
+
+  uint64_t xcr0 = 0;
+  if (ecx & (1u << 27)) {
+    // XCR0 may only be queried if the OSXSAVE bit is set.
+    xcr0 = OPENSSL_xgetbv(0);
+  }
+  // See Intel manual, volume 1, section 14.3.
+  if ((xcr0 & 6) != 6) {
+    // YMM registers cannot be used.
+    ecx &= ~(1u << 28);  // AVX
+    ecx &= ~(1u << 12);  // FMA
+    ecx &= ~(1u << 11);  // AMD XOP
+    // Clear AVX2 and AVX512* bits.
+    //
+    // TODO(davidben): Should bits 17 and 26-28 also be cleared? Upstream
+    // doesn't clear those.
+    extended_features[0] &=
+        ~((1u << 5) | (1u << 16) | (1u << 21) | (1u << 30) | (1u << 31));
+  }
+  // See Intel manual, volume 1, section 15.2.
+  if ((xcr0 & 0xe6) != 0xe6) {
+    // Clear AVX512F. Note we don't touch other AVX512 extensions because they
+    // can be used with YMM.
+    extended_features[0] &= ~(1u << 16);
+  }
+
+  // Disable ADX instructions on Knights Landing. See OpenSSL commit
+  // 64d92d74985ebb3d0be58a9718f9e080a14a8e7f.
+  if ((ecx & (1u << 26)) == 0) {
+    extended_features[0] &= ~(1u << 19);
+  }
+
+  OPENSSL_ia32cap_P[0] = edx;
+  OPENSSL_ia32cap_P[1] = ecx;
+  OPENSSL_ia32cap_P[2] = extended_features[0];
+  OPENSSL_ia32cap_P[3] = extended_features[1];
+
+  const char *env1, *env2;
+  env1 = getenv("OPENSSL_ia32cap");
+  if (env1 == NULL) {
+    return;
+  }
+
+  // OPENSSL_ia32cap can contain zero, one or two values, separated with a ':'.
+  // Each value is a 64-bit, unsigned value which may start with "0x" to
+  // indicate a hex value. Prior to the 64-bit value, a '~' or '|' may be given.
+  //
+  // If the '~' prefix is present:
+  //   the value is inverted and ANDed with the probed CPUID result
+  // If the '|' prefix is present:
+  //   the value is ORed with the probed CPUID result
+  // Otherwise:
+  //   the value is taken as the result of the CPUID
+  //
+  // The first value determines OPENSSL_ia32cap_P[0] and [1]. The second [2]
+  // and [3].
+
+  handle_cpu_env(&OPENSSL_ia32cap_P[0], env1);
+  env2 = strchr(env1, ':');
+  if (env2 != NULL) {
+    handle_cpu_env(&OPENSSL_ia32cap_P[2], env2 + 1);
+  }
+}
+
+#endif  // !OPENSSL_NO_ASM && (OPENSSL_X86 || OPENSSL_X86_64)
--- a/Sources/CBigNumBoringSSL/crypto/cpu-ppc64le.c
+++ b/Sources/CBigNumBoringSSL/crypto/cpu-ppc64le.c
@ -0,0 +1,38 @@
+/* Copyright (c) 2016, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <CBigNumBoringSSL_cpu.h>
+
+#if defined(OPENSSL_PPC64LE)
+
+#include <sys/auxv.h>
+
+#include "internal.h"
+
+
+#if !defined(PPC_FEATURE2_HAS_VCRYPTO)
+// PPC_FEATURE2_HAS_VCRYPTO was taken from section 4.1.2.3 of the “OpenPOWER
+// ABI for Linux Supplement”.
+#define PPC_FEATURE2_HAS_VCRYPTO 0x02000000
+#endif
+
+void OPENSSL_cpuid_setup(void) {
+  OPENSSL_ppc64le_hwcap2 = getauxval(AT_HWCAP2);
+}
+
+int CRYPTO_is_PPC64LE_vcrypto_capable(void) {
+  return (OPENSSL_ppc64le_hwcap2 & PPC_FEATURE2_HAS_VCRYPTO) != 0;
+}
+
+#endif  // OPENSSL_PPC64LE
--- a/Sources/CBigNumBoringSSL/crypto/crypto.c
+++ b/Sources/CBigNumBoringSSL/crypto/crypto.c
@ -0,0 +1,215 @@
+/* Copyright (c) 2014, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <CBigNumBoringSSL_crypto.h>
+
+#include <CBigNumBoringSSL_cpu.h>
+
+#include "internal.h"
+
+
+#if !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_STATIC_ARMCAP) && \
+    (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
+     defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64) || \
+     defined(OPENSSL_PPC64LE))
+// x86, x86_64, the ARMs and ppc64le need to record the result of a
+// cpuid/getauxval call for the asm to work correctly, unless compiled without
+// asm code.
+#define NEED_CPUID
+
+#else
+
+// Otherwise, don't emit a static initialiser.
+
+#if !defined(BORINGSSL_NO_STATIC_INITIALIZER)
+#define BORINGSSL_NO_STATIC_INITIALIZER
+#endif
+
+#endif  // !NO_ASM && !STATIC_ARMCAP &&
+        // (X86 || X86_64 || ARM || AARCH64 || PPC64LE)
+
+
+// Our assembly does not use the GOT to reference symbols, which means
+// references to visible symbols will often require a TEXTREL. This is
+// undesirable, so all assembly-referenced symbols should be hidden. CPU
+// capabilities are the only such symbols defined in C. Explicitly hide them,
+// rather than rely on being built with -fvisibility=hidden.
+#if defined(OPENSSL_WINDOWS)
+#define HIDDEN
+#else
+#define HIDDEN __attribute__((visibility("hidden")))
+#endif
+
+
+// The capability variables are defined in this file in order to work around a
+// linker bug. When linking with a .a, if no symbols in a .o are referenced
+// then the .o is discarded, even if it has constructor functions.
+//
+// This still means that any binaries that don't include some functionality
+// that tests the capability values will still skip the constructor but, so
+// far, the init constructor function only sets the capability variables.
+
+#if defined(BORINGSSL_DISPATCH_TEST)
+// This value must be explicitly initialised to zero in order to work around a
+// bug in libtool or the linker on OS X.
+//
+// If not initialised then it becomes a "common symbol". When put into an
+// archive, linking on OS X will fail to resolve common symbols. By
+// initialising it to zero, it becomes a "data symbol", which isn't so
+// affected.
+HIDDEN uint8_t BORINGSSL_function_hit[7] = {0};
+#endif
+
+#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
+
+// This value must be explicitly initialized to zero. See similar comment above.
+HIDDEN uint32_t OPENSSL_ia32cap_P[4] = {0};
+
+#elif defined(OPENSSL_PPC64LE)
+
+HIDDEN unsigned long OPENSSL_ppc64le_hwcap2 = 0;
+
+#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
+
+#include <CBigNumBoringSSL_arm_arch.h>
+
+#if defined(OPENSSL_STATIC_ARMCAP)
+
+HIDDEN uint32_t OPENSSL_armcap_P =
+#if defined(OPENSSL_STATIC_ARMCAP_NEON) || \
+    (defined(__ARM_NEON__) || defined(__ARM_NEON))
+    ARMV7_NEON |
+#endif
+#if defined(OPENSSL_STATIC_ARMCAP_AES) || defined(__ARM_FEATURE_CRYPTO)
+    ARMV8_AES |
+#endif
+#if defined(OPENSSL_STATIC_ARMCAP_SHA1) || defined(__ARM_FEATURE_CRYPTO)
+    ARMV8_SHA1 |
+#endif
+#if defined(OPENSSL_STATIC_ARMCAP_SHA256) || defined(__ARM_FEATURE_CRYPTO)
+    ARMV8_SHA256 |
+#endif
+#if defined(OPENSSL_STATIC_ARMCAP_PMULL) || defined(__ARM_FEATURE_CRYPTO)
+    ARMV8_PMULL |
+#endif
+    0;
+
+#else
+HIDDEN uint32_t OPENSSL_armcap_P = 0;
+
+uint32_t *OPENSSL_get_armcap_pointer_for_test(void) {
+  return &OPENSSL_armcap_P;
+}
+#endif
+
+#endif
+
+#if defined(BORINGSSL_FIPS)
+// In FIPS mode, the power-on self-test function calls |CRYPTO_library_init|
+// because we have to ensure that CPUID detection occurs first.
+#define BORINGSSL_NO_STATIC_INITIALIZER
+#endif
+
+#if defined(OPENSSL_WINDOWS) && !defined(BORINGSSL_NO_STATIC_INITIALIZER)
+#define OPENSSL_CDECL __cdecl
+#else
+#define OPENSSL_CDECL
+#endif
+
+#if defined(BORINGSSL_NO_STATIC_INITIALIZER)
+static CRYPTO_once_t once = CRYPTO_ONCE_INIT;
+#elif defined(_MSC_VER)
+#pragma section(".CRT$XCU", read)
+static void __cdecl do_library_init(void);
+__declspec(allocate(".CRT$XCU")) void(*library_init_constructor)(void) =
+    do_library_init;
+#else
+static void do_library_init(void) __attribute__ ((constructor));
+#endif
+
+// do_library_init is the actual initialization function. If
+// BORINGSSL_NO_STATIC_INITIALIZER isn't defined, this is set as a static
+// initializer. Otherwise, it is called by CRYPTO_library_init.
+static void OPENSSL_CDECL do_library_init(void) {
+ // WARNING: this function may only configure the capability variables. See the
+ // note above about the linker bug.
+#if defined(NEED_CPUID)
+  OPENSSL_cpuid_setup();
+#endif
+}
+
+void CRYPTO_library_init(void) {
+  // TODO(davidben): It would be tidier if this build knob could be replaced
+  // with an internal lazy-init mechanism that would handle things correctly
+  // in-library. https://crbug.com/542879
+#if defined(BORINGSSL_NO_STATIC_INITIALIZER)
+  CRYPTO_once(&once, do_library_init);
+#endif
+}
+
+int CRYPTO_is_confidential_build(void) {
+#if defined(BORINGSSL_CONFIDENTIAL)
+  return 1;
+#else
+  return 0;
+#endif
+}
+
+int CRYPTO_has_asm(void) {
+#if defined(OPENSSL_NO_ASM)
+  return 0;
+#else
+  return 1;
+#endif
+}
+
+const char *SSLeay_version(int which) { return OpenSSL_version(which); }
+
+const char *OpenSSL_version(int which) {
+  switch (which) {
+    case OPENSSL_VERSION:
+      return "BoringSSL";
+    case OPENSSL_CFLAGS:
+      return "compiler: n/a";
+    case OPENSSL_BUILT_ON:
+      return "built on: n/a";
+    case OPENSSL_PLATFORM:
+      return "platform: n/a";
+    case OPENSSL_DIR:
+      return "OPENSSLDIR: n/a";
+    default:
+      return "not available";
+  }
+}
+
+unsigned long SSLeay(void) { return OPENSSL_VERSION_NUMBER; }
+
+unsigned long OpenSSL_version_num(void) { return OPENSSL_VERSION_NUMBER; }
+
+int CRYPTO_malloc_init(void) { return 1; }
+
+int OPENSSL_malloc_init(void) { return 1; }
+
+void ENGINE_load_builtin_engines(void) {}
+
+int ENGINE_register_all_complete(void) { return 1; }
+
+void OPENSSL_load_builtin_modules(void) {}
+
+int OPENSSL_init_crypto(uint64_t opts, const OPENSSL_INIT_SETTINGS *settings) {
+  CRYPTO_library_init();
+  return 1;
+}
+
+void OPENSSL_cleanup(void) {}
--- a/Sources/CBigNumBoringSSL/crypto/err/err.c
+++ b/Sources/CBigNumBoringSSL/crypto/err/err.c
@ -0,0 +1,850 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+/* ====================================================================
+ * Copyright (c) 1998-2006 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com). */
+
+#include <CBigNumBoringSSL_err.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <string.h>
+
+#if defined(OPENSSL_WINDOWS)
+OPENSSL_MSVC_PRAGMA(warning(push, 3))
+#include <windows.h>
+OPENSSL_MSVC_PRAGMA(warning(pop))
+#endif
+
+#include <CBigNumBoringSSL_mem.h>
+#include <CBigNumBoringSSL_thread.h>
+
+#include "../internal.h"
+#include "./internal.h"
+
+
+struct err_error_st {
+  // file contains the filename where the error occurred.
+  const char *file;
+  // data contains a NUL-terminated string with optional data. It must be freed
+  // with |OPENSSL_free|.
+  char *data;
+  // packed contains the error library and reason, as packed by ERR_PACK.
+  uint32_t packed;
+  // line contains the line number where the error occurred.
+  uint16_t line;
+  // mark indicates a reversion point in the queue. See |ERR_pop_to_mark|.
+  unsigned mark : 1;
+};
+
+// ERR_STATE contains the per-thread, error queue.
+typedef struct err_state_st {
+  // errors contains the ERR_NUM_ERRORS most recent errors, organised as a ring
+  // buffer.
+  struct err_error_st errors[ERR_NUM_ERRORS];
+  // top contains the index one past the most recent error. If |top| equals
+  // |bottom| then the queue is empty.
+  unsigned top;
+  // bottom contains the index of the last error in the queue.
+  unsigned bottom;
+
+  // to_free, if not NULL, contains a pointer owned by this structure that was
+  // previously a |data| pointer of one of the elements of |errors|.
+  void *to_free;
+} ERR_STATE;
+
+extern const uint32_t kOpenSSLReasonValues[];
+extern const size_t kOpenSSLReasonValuesLen;
+extern const char kOpenSSLReasonStringData[];
+
+// err_clear clears the given queued error.
+static void err_clear(struct err_error_st *error) {
+  OPENSSL_free(error->data);
+  OPENSSL_memset(error, 0, sizeof(struct err_error_st));
+}
+
+static void err_copy(struct err_error_st *dst, const struct err_error_st *src) {
+  err_clear(dst);
+  dst->file = src->file;
+  if (src->data != NULL) {
+    dst->data = OPENSSL_strdup(src->data);
+  }
+  dst->packed = src->packed;
+  dst->line = src->line;
+}
+
+// global_next_library contains the next custom library value to return.
+static int global_next_library = ERR_NUM_LIBS;
+
+// global_next_library_mutex protects |global_next_library| from concurrent
+// updates.
+static struct CRYPTO_STATIC_MUTEX global_next_library_mutex =
+    CRYPTO_STATIC_MUTEX_INIT;
+
+static void err_state_free(void *statep) {
+  ERR_STATE *state = statep;
+
+  if (state == NULL) {
+    return;
+  }
+
+  for (unsigned i = 0; i < ERR_NUM_ERRORS; i++) {
+    err_clear(&state->errors[i]);
+  }
+  OPENSSL_free(state->to_free);
+  OPENSSL_free(state);
+}
+
+// err_get_state gets the ERR_STATE object for the current thread.
+static ERR_STATE *err_get_state(void) {
+  ERR_STATE *state = CRYPTO_get_thread_local(OPENSSL_THREAD_LOCAL_ERR);
+  if (state == NULL) {
+    state = OPENSSL_malloc(sizeof(ERR_STATE));
+    if (state == NULL) {
+      return NULL;
+    }
+    OPENSSL_memset(state, 0, sizeof(ERR_STATE));
+    if (!CRYPTO_set_thread_local(OPENSSL_THREAD_LOCAL_ERR, state,
+                                 err_state_free)) {
+      return NULL;
+    }
+  }
+
+  return state;
+}
+
+static uint32_t get_error_values(int inc, int top, const char **file, int *line,
+                                 const char **data, int *flags) {
+  unsigned i = 0;
+  ERR_STATE *state;
+  struct err_error_st *error;
+  uint32_t ret;
+
+  state = err_get_state();
+  if (state == NULL || state->bottom == state->top) {
+    return 0;
+  }
+
+  if (top) {
+    assert(!inc);
+    // last error
+    i = state->top;
+  } else {
+    i = (state->bottom + 1) % ERR_NUM_ERRORS;
+  }
+
+  error = &state->errors[i];
+  ret = error->packed;
+
+  if (file != NULL && line != NULL) {
+    if (error->file == NULL) {
+      *file = "NA";
+      *line = 0;
+    } else {
+      *file = error->file;
+      *line = error->line;
+    }
+  }
+
+  if (data != NULL) {
+    if (error->data == NULL) {
+      *data = "";
+      if (flags != NULL) {
+        *flags = 0;
+      }
+    } else {
+      *data = error->data;
+      if (flags != NULL) {
+        *flags = ERR_FLAG_STRING;
+      }
+      // If this error is being removed, take ownership of data from
+      // the error. The semantics are such that the caller doesn't
+      // take ownership either. Instead the error system takes
+      // ownership and retains it until the next call that affects the
+      // error queue.
+      if (inc) {
+        if (error->data != NULL) {
+          OPENSSL_free(state->to_free);
+          state->to_free = error->data;
+        }
+        error->data = NULL;
+      }
+    }
+  }
+
+  if (inc) {
+    assert(!top);
+    err_clear(error);
+    state->bottom = i;
+  }
+
+  return ret;
+}
+
+uint32_t ERR_get_error(void) {
+  return get_error_values(1 /* inc */, 0 /* bottom */, NULL, NULL, NULL, NULL);
+}
+
+uint32_t ERR_get_error_line(const char **file, int *line) {
+  return get_error_values(1 /* inc */, 0 /* bottom */, file, line, NULL, NULL);
+}
+
+uint32_t ERR_get_error_line_data(const char **file, int *line,
+                                 const char **data, int *flags) {
+  return get_error_values(1 /* inc */, 0 /* bottom */, file, line, data, flags);
+}
+
+uint32_t ERR_peek_error(void) {
+  return get_error_values(0 /* peek */, 0 /* bottom */, NULL, NULL, NULL, NULL);
+}
+
+uint32_t ERR_peek_error_line(const char **file, int *line) {
+  return get_error_values(0 /* peek */, 0 /* bottom */, file, line, NULL, NULL);
+}
+
+uint32_t ERR_peek_error_line_data(const char **file, int *line,
+                                  const char **data, int *flags) {
+  return get_error_values(0 /* peek */, 0 /* bottom */, file, line, data,
+                          flags);
+}
+
+uint32_t ERR_peek_last_error(void) {
+  return get_error_values(0 /* peek */, 1 /* top */, NULL, NULL, NULL, NULL);
+}
+
+uint32_t ERR_peek_last_error_line(const char **file, int *line) {
+  return get_error_values(0 /* peek */, 1 /* top */, file, line, NULL, NULL);
+}
+
+uint32_t ERR_peek_last_error_line_data(const char **file, int *line,
+                                       const char **data, int *flags) {
+  return get_error_values(0 /* peek */, 1 /* top */, file, line, data, flags);
+}
+
+void ERR_clear_error(void) {
+  ERR_STATE *const state = err_get_state();
+  unsigned i;
+
+  if (state == NULL) {
+    return;
+  }
+
+  for (i = 0; i < ERR_NUM_ERRORS; i++) {
+    err_clear(&state->errors[i]);
+  }
+  OPENSSL_free(state->to_free);
+  state->to_free = NULL;
+
+  state->top = state->bottom = 0;
+}
+
+void ERR_remove_thread_state(const CRYPTO_THREADID *tid) {
+  if (tid != NULL) {
+    assert(0);
+    return;
+  }
+
+  ERR_clear_error();
+}
+
+int ERR_get_next_error_library(void) {
+  int ret;
+
+  CRYPTO_STATIC_MUTEX_lock_write(&global_next_library_mutex);
+  ret = global_next_library++;
+  CRYPTO_STATIC_MUTEX_unlock_write(&global_next_library_mutex);
+
+  return ret;
+}
+
+void ERR_remove_state(unsigned long pid) {
+  ERR_clear_error();
+}
+
+void ERR_clear_system_error(void) {
+  errno = 0;
+}
+
+char *ERR_error_string(uint32_t packed_error, char *ret) {
+  static char buf[ERR_ERROR_STRING_BUF_LEN];
+
+  if (ret == NULL) {
+    // TODO(fork): remove this.
+    ret = buf;
+  }
+
+#if !defined(NDEBUG)
+  // This is aimed to help catch callers who don't provide
+  // |ERR_ERROR_STRING_BUF_LEN| bytes of space.
+  OPENSSL_memset(ret, 0, ERR_ERROR_STRING_BUF_LEN);
+#endif
+
+  return ERR_error_string_n(packed_error, ret, ERR_ERROR_STRING_BUF_LEN);
+}
+
+char *ERR_error_string_n(uint32_t packed_error, char *buf, size_t len) {
+  char lib_buf[64], reason_buf[64];
+  const char *lib_str, *reason_str;
+  unsigned lib, reason;
+
+  if (len == 0) {
+    return NULL;
+  }
+
+  lib = ERR_GET_LIB(packed_error);
+  reason = ERR_GET_REASON(packed_error);
+
+  lib_str = ERR_lib_error_string(packed_error);
+  reason_str = ERR_reason_error_string(packed_error);
+
+  if (lib_str == NULL) {
+    BIO_snprintf(lib_buf, sizeof(lib_buf), "lib(%u)", lib);
+    lib_str = lib_buf;
+  }
+
+ if (reason_str == NULL) {
+    BIO_snprintf(reason_buf, sizeof(reason_buf), "reason(%u)", reason);
+    reason_str = reason_buf;
+  }
+
+  BIO_snprintf(buf, len, "error:%08" PRIx32 ":%s:OPENSSL_internal:%s",
+               packed_error, lib_str, reason_str);
+
+  if (strlen(buf) == len - 1) {
+    // output may be truncated; make sure we always have 5 colon-separated
+    // fields, i.e. 4 colons.
+    static const unsigned num_colons = 4;
+    unsigned i;
+    char *s = buf;
+
+    if (len <= num_colons) {
+      // In this situation it's not possible to ensure that the correct number
+      // of colons are included in the output.
+      return buf;
+    }
+
+    for (i = 0; i < num_colons; i++) {
+      char *colon = strchr(s, ':');
+      char *last_pos = &buf[len - 1] - num_colons + i;
+
+      if (colon == NULL || colon > last_pos) {
+        // set colon |i| at last possible position (buf[len-1] is the
+        // terminating 0). If we're setting this colon, then all whole of the
+        // rest of the string must be colons in order to have the correct
+        // number.
+        OPENSSL_memset(last_pos, ':', num_colons - i);
+        break;
+      }
+
+      s = colon + 1;
+    }
+  }
+
+  return buf;
+}
+
+// err_string_cmp is a compare function for searching error values with
+// |bsearch| in |err_string_lookup|.
+static int err_string_cmp(const void *a, const void *b) {
+  const uint32_t a_key = *((const uint32_t*) a) >> 15;
+  const uint32_t b_key = *((const uint32_t*) b) >> 15;
+
+  if (a_key < b_key) {
+    return -1;
+  } else if (a_key > b_key) {
+    return 1;
+  } else {
+    return 0;
+  }
+}
+
+// err_string_lookup looks up the string associated with |lib| and |key| in
+// |values| and |string_data|. It returns the string or NULL if not found.
+static const char *err_string_lookup(uint32_t lib, uint32_t key,
+                                     const uint32_t *values,
+                                     size_t num_values,
+                                     const char *string_data) {
+  // |values| points to data in err_data.h, which is generated by
+  // err_data_generate.go. It's an array of uint32_t values. Each value has the
+  // following structure:
+  //   | lib  |    key    |    offset     |
+  //   |6 bits|  11 bits  |    15 bits    |
+  //
+  // The |lib| value is a library identifier: one of the |ERR_LIB_*| values.
+  // The |key| is a reason code, depending on the context.
+  // The |offset| is the number of bytes from the start of |string_data| where
+  // the (NUL terminated) string for this value can be found.
+  //
+  // Values are sorted based on treating the |lib| and |key| part as an
+  // unsigned integer.
+  if (lib >= (1 << 6) || key >= (1 << 11)) {
+    return NULL;
+  }
+  uint32_t search_key = lib << 26 | key << 15;
+  const uint32_t *result = bsearch(&search_key, values, num_values,
+                                   sizeof(uint32_t), err_string_cmp);
+  if (result == NULL) {
+    return NULL;
+  }
+
+  return &string_data[(*result) & 0x7fff];
+}
+
+static const char *const kLibraryNames[ERR_NUM_LIBS] = {
+    "invalid library (0)",
+    "unknown library",              // ERR_LIB_NONE
+    "system library",               // ERR_LIB_SYS
+    "bignum routines",              // ERR_LIB_BN
+    "RSA routines",                 // ERR_LIB_RSA
+    "Diffie-Hellman routines",      // ERR_LIB_DH
+    "public key routines",          // ERR_LIB_EVP
+    "memory buffer routines",       // ERR_LIB_BUF
+    "object identifier routines",   // ERR_LIB_OBJ
+    "PEM routines",                 // ERR_LIB_PEM
+    "DSA routines",                 // ERR_LIB_DSA
+    "X.509 certificate routines",   // ERR_LIB_X509
+    "ASN.1 encoding routines",      // ERR_LIB_ASN1
+    "configuration file routines",  // ERR_LIB_CONF
+    "common libcrypto routines",    // ERR_LIB_CRYPTO
+    "elliptic curve routines",      // ERR_LIB_EC
+    "SSL routines",                 // ERR_LIB_SSL
+    "BIO routines",                 // ERR_LIB_BIO
+    "PKCS7 routines",               // ERR_LIB_PKCS7
+    "PKCS8 routines",               // ERR_LIB_PKCS8
+    "X509 V3 routines",             // ERR_LIB_X509V3
+    "random number generator",      // ERR_LIB_RAND
+    "ENGINE routines",              // ERR_LIB_ENGINE
+    "OCSP routines",                // ERR_LIB_OCSP
+    "UI routines",                  // ERR_LIB_UI
+    "COMP routines",                // ERR_LIB_COMP
+    "ECDSA routines",               // ERR_LIB_ECDSA
+    "ECDH routines",                // ERR_LIB_ECDH
+    "HMAC routines",                // ERR_LIB_HMAC
+    "Digest functions",             // ERR_LIB_DIGEST
+    "Cipher functions",             // ERR_LIB_CIPHER
+    "HKDF functions",               // ERR_LIB_HKDF
+    "Trust Token functions",        // ERR_LIB_TRUST_TOKEN
+    "User defined functions",       // ERR_LIB_USER
+};
+
+const char *ERR_lib_error_string(uint32_t packed_error) {
+  const uint32_t lib = ERR_GET_LIB(packed_error);
+
+  if (lib >= ERR_NUM_LIBS) {
+    return NULL;
+  }
+  return kLibraryNames[lib];
+}
+
+const char *ERR_func_error_string(uint32_t packed_error) {
+  return "OPENSSL_internal";
+}
+
+const char *ERR_reason_error_string(uint32_t packed_error) {
+  const uint32_t lib = ERR_GET_LIB(packed_error);
+  const uint32_t reason = ERR_GET_REASON(packed_error);
+
+  if (lib == ERR_LIB_SYS) {
+    if (reason < 127) {
+      return strerror(reason);
+    }
+    return NULL;
+  }
+
+  if (reason < ERR_NUM_LIBS) {
+    return kLibraryNames[reason];
+  }
+
+  if (reason < 100) {
+    switch (reason) {
+      case ERR_R_MALLOC_FAILURE:
+        return "malloc failure";
+      case ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED:
+        return "function should not have been called";
+      case ERR_R_PASSED_NULL_PARAMETER:
+        return "passed a null parameter";
+      case ERR_R_INTERNAL_ERROR:
+        return "internal error";
+      case ERR_R_OVERFLOW:
+        return "overflow";
+      default:
+        return NULL;
+    }
+  }
+
+  return err_string_lookup(lib, reason, kOpenSSLReasonValues,
+                           kOpenSSLReasonValuesLen, kOpenSSLReasonStringData);
+}
+
+void ERR_print_errors_cb(ERR_print_errors_callback_t callback, void *ctx) {
+  char buf[ERR_ERROR_STRING_BUF_LEN];
+  char buf2[1024];
+  const char *file, *data;
+  int line, flags;
+  uint32_t packed_error;
+
+  // thread_hash is the least-significant bits of the |ERR_STATE| pointer value
+  // for this thread.
+  const unsigned long thread_hash = (uintptr_t) err_get_state();
+
+  for (;;) {
+    packed_error = ERR_get_error_line_data(&file, &line, &data, &flags);
+    if (packed_error == 0) {
+      break;
+    }
+
+    ERR_error_string_n(packed_error, buf, sizeof(buf));
+    BIO_snprintf(buf2, sizeof(buf2), "%lu:%s:%s:%d:%s\n", thread_hash, buf,
+                 file, line, (flags & ERR_FLAG_STRING) ? data : "");
+    if (callback(buf2, strlen(buf2), ctx) <= 0) {
+      break;
+    }
+  }
+}
+
+static int print_errors_to_file(const char* msg, size_t msg_len, void* ctx) {
+  assert(msg[msg_len] == '\0');
+  FILE* fp = ctx;
+  int res = fputs(msg, fp);
+  return res < 0 ? 0 : 1;
+}
+
+void ERR_print_errors_fp(FILE *file) {
+  ERR_print_errors_cb(print_errors_to_file, file);
+}
+
+// err_set_error_data sets the data on the most recent error.
+static void err_set_error_data(char *data) {
+  ERR_STATE *const state = err_get_state();
+  struct err_error_st *error;
+
+  if (state == NULL || state->top == state->bottom) {
+    OPENSSL_free(data);
+    return;
+  }
+
+  error = &state->errors[state->top];
+
+  OPENSSL_free(error->data);
+  error->data = data;
+}
+
+void ERR_put_error(int library, int unused, int reason, const char *file,
+                   unsigned line) {
+  ERR_STATE *const state = err_get_state();
+  struct err_error_st *error;
+
+  if (state == NULL) {
+    return;
+  }
+
+  if (library == ERR_LIB_SYS && reason == 0) {
+#if defined(OPENSSL_WINDOWS)
+    reason = GetLastError();
+#else
+    reason = errno;
+#endif
+  }
+
+  state->top = (state->top + 1) % ERR_NUM_ERRORS;
+  if (state->top == state->bottom) {
+    state->bottom = (state->bottom + 1) % ERR_NUM_ERRORS;
+  }
+
+  error = &state->errors[state->top];
+  err_clear(error);
+  error->file = file;
+  error->line = line;
+  error->packed = ERR_PACK(library, reason);
+}
+
+// ERR_add_error_data_vdata takes a variable number of const char* pointers,
+// concatenates them and sets the result as the data on the most recent
+// error.
+static void err_add_error_vdata(unsigned num, va_list args) {
+  size_t alloced, new_len, len = 0, substr_len;
+  char *buf;
+  const char *substr;
+  unsigned i;
+
+  alloced = 80;
+  buf = OPENSSL_malloc(alloced + 1);
+  if (buf == NULL) {
+    return;
+  }
+
+  for (i = 0; i < num; i++) {
+    substr = va_arg(args, const char *);
+    if (substr == NULL) {
+      continue;
+    }
+
+    substr_len = strlen(substr);
+    new_len = len + substr_len;
+    if (new_len > alloced) {
+      char *new_buf;
+
+      if (alloced + 20 + 1 < alloced) {
+        // overflow.
+        OPENSSL_free(buf);
+        return;
+      }
+
+      alloced = new_len + 20;
+      new_buf = OPENSSL_realloc(buf, alloced + 1);
+      if (new_buf == NULL) {
+        OPENSSL_free(buf);
+        return;
+      }
+      buf = new_buf;
+    }
+
+    OPENSSL_memcpy(buf + len, substr, substr_len);
+    len = new_len;
+  }
+
+  buf[len] = 0;
+  err_set_error_data(buf);
+}
+
+void ERR_add_error_data(unsigned count, ...) {
+  va_list args;
+  va_start(args, count);
+  err_add_error_vdata(count, args);
+  va_end(args);
+}
+
+void ERR_add_error_dataf(const char *format, ...) {
+  va_list ap;
+  char *buf;
+  static const unsigned buf_len = 256;
+
+  // A fixed-size buffer is used because va_copy (which would be needed in
+  // order to call vsnprintf twice and measure the buffer) wasn't defined until
+  // C99.
+  buf = OPENSSL_malloc(buf_len + 1);
+  if (buf == NULL) {
+    return;
+  }
+
+  va_start(ap, format);
+  BIO_vsnprintf(buf, buf_len, format, ap);
+  buf[buf_len] = 0;
+  va_end(ap);
+
+  err_set_error_data(buf);
+}
+
+int ERR_set_mark(void) {
+  ERR_STATE *const state = err_get_state();
+
+  if (state == NULL || state->bottom == state->top) {
+    return 0;
+  }
+  state->errors[state->top].mark = 1;
+  return 1;
+}
+
+int ERR_pop_to_mark(void) {
+  ERR_STATE *const state = err_get_state();
+
+  if (state == NULL) {
+    return 0;
+  }
+
+  while (state->bottom != state->top) {
+    struct err_error_st *error = &state->errors[state->top];
+
+    if (error->mark) {
+      error->mark = 0;
+      return 1;
+    }
+
+    err_clear(error);
+    if (state->top == 0) {
+      state->top = ERR_NUM_ERRORS - 1;
+    } else {
+      state->top--;
+    }
+  }
+
+  return 0;
+}
+
+void ERR_load_crypto_strings(void) {}
+
+void ERR_free_strings(void) {}
+
+void ERR_load_BIO_strings(void) {}
+
+void ERR_load_ERR_strings(void) {}
+
+void ERR_load_RAND_strings(void) {}
+
+struct err_save_state_st {
+  struct err_error_st *errors;
+  size_t num_errors;
+};
+
+void ERR_SAVE_STATE_free(ERR_SAVE_STATE *state) {
+  if (state == NULL) {
+    return;
+  }
+  for (size_t i = 0; i < state->num_errors; i++) {
+    err_clear(&state->errors[i]);
+  }
+  OPENSSL_free(state->errors);
+  OPENSSL_free(state);
+}
+
+ERR_SAVE_STATE *ERR_save_state(void) {
+  ERR_STATE *const state = err_get_state();
+  if (state == NULL || state->top == state->bottom) {
+    return NULL;
+  }
+
+  ERR_SAVE_STATE *ret = OPENSSL_malloc(sizeof(ERR_SAVE_STATE));
+  if (ret == NULL) {
+    return NULL;
+  }
+
+  // Errors are stored in the range (bottom, top].
+  size_t num_errors = state->top >= state->bottom
+                          ? state->top - state->bottom
+                          : ERR_NUM_ERRORS + state->top - state->bottom;
+  assert(num_errors < ERR_NUM_ERRORS);
+  ret->errors = OPENSSL_malloc(num_errors * sizeof(struct err_error_st));
+  if (ret->errors == NULL) {
+    OPENSSL_free(ret);
+    return NULL;
+  }
+  OPENSSL_memset(ret->errors, 0, num_errors * sizeof(struct err_error_st));
+  ret->num_errors = num_errors;
+
+  for (size_t i = 0; i < num_errors; i++) {
+    size_t j = (state->bottom + i + 1) % ERR_NUM_ERRORS;
+    err_copy(&ret->errors[i], &state->errors[j]);
+  }
+  return ret;
+}
+
+void ERR_restore_state(const ERR_SAVE_STATE *state) {
+  if (state == NULL || state->num_errors == 0) {
+    ERR_clear_error();
+    return;
+  }
+
+  ERR_STATE *const dst = err_get_state();
+  if (dst == NULL) {
+    return;
+  }
+
+  for (size_t i = 0; i < state->num_errors; i++) {
+    err_copy(&dst->errors[i], &state->errors[i]);
+  }
+  dst->top = state->num_errors - 1;
+  dst->bottom = ERR_NUM_ERRORS - 1;
+}
--- a/Sources/CBigNumBoringSSL/crypto/err/err_data.c
+++ b/Sources/CBigNumBoringSSL/crypto/err/err_data.c
--- a/Sources/CBigNumBoringSSL/crypto/err/internal.h
+++ b/Sources/CBigNumBoringSSL/crypto/err/internal.h
@ -0,0 +1,58 @@
+/* Copyright (c) 2017, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#ifndef OPENSSL_HEADER_CRYPTO_ERR_INTERNAL_H
+#define OPENSSL_HEADER_CRYPTO_ERR_INTERNAL_H
+
+#include <CBigNumBoringSSL_err.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+// Private error queue functions.
+
+// ERR_SAVE_STATE contains a saved representation of the error queue. It is
+// slightly more compact than |ERR_STATE| as the error queue will typically not
+// contain |ERR_NUM_ERRORS| entries.
+typedef struct err_save_state_st ERR_SAVE_STATE;
+
+// ERR_SAVE_STATE_free releases all memory associated with |state|.
+OPENSSL_EXPORT void ERR_SAVE_STATE_free(ERR_SAVE_STATE *state);
+
+// ERR_save_state returns a newly-allocated |ERR_SAVE_STATE| structure
+// containing the current state of the error queue or NULL on allocation
+// error. It should be released with |ERR_SAVE_STATE_free|.
+OPENSSL_EXPORT ERR_SAVE_STATE *ERR_save_state(void);
+
+// ERR_restore_state clears the error queue and replaces it with |state|.
+OPENSSL_EXPORT void ERR_restore_state(const ERR_SAVE_STATE *state);
+
+
+#if defined(__cplusplus)
+}  // extern C
+
+extern "C++" {
+
+BSSL_NAMESPACE_BEGIN
+
+BORINGSSL_MAKE_DELETER(ERR_SAVE_STATE, ERR_SAVE_STATE_free)
+
+BSSL_NAMESPACE_END
+
+}  // extern C++
+#endif
+
+#endif  // OPENSSL_HEADER_CRYPTO_ERR_INTERNAL_H
--- a/Sources/CBigNumBoringSSL/crypto/ex_data.c
+++ b/Sources/CBigNumBoringSSL/crypto/ex_data.c
@ -0,0 +1,261 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+/* ====================================================================
+ * Copyright (c) 1998-2001 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com). */
+
+#include <CBigNumBoringSSL_ex_data.h>
+
+#include <assert.h>
+#include <string.h>
+
+#include <CBigNumBoringSSL_crypto.h>
+#include <CBigNumBoringSSL_err.h>
+#include <CBigNumBoringSSL_mem.h>
+#include <CBigNumBoringSSL_stack.h>
+#include <CBigNumBoringSSL_thread.h>
+
+#include "internal.h"
+
+
+DEFINE_STACK_OF(CRYPTO_EX_DATA_FUNCS)
+
+struct crypto_ex_data_func_st {
+  long argl;   // Arbitary long
+  void *argp;  // Arbitary void pointer
+  CRYPTO_EX_free *free_func;
+};
+
+int CRYPTO_get_ex_new_index(CRYPTO_EX_DATA_CLASS *ex_data_class, int *out_index,
+                            long argl, void *argp, CRYPTO_EX_free *free_func) {
+  CRYPTO_EX_DATA_FUNCS *funcs;
+  int ret = 0;
+
+  funcs = OPENSSL_malloc(sizeof(CRYPTO_EX_DATA_FUNCS));
+  if (funcs == NULL) {
+    OPENSSL_PUT_ERROR(CRYPTO, ERR_R_MALLOC_FAILURE);
+    return 0;
+  }
+
+  funcs->argl = argl;
+  funcs->argp = argp;
+  funcs->free_func = free_func;
+
+  CRYPTO_STATIC_MUTEX_lock_write(&ex_data_class->lock);
+
+  if (ex_data_class->meth == NULL) {
+    ex_data_class->meth = sk_CRYPTO_EX_DATA_FUNCS_new_null();
+  }
+
+  if (ex_data_class->meth == NULL ||
+      !sk_CRYPTO_EX_DATA_FUNCS_push(ex_data_class->meth, funcs)) {
+    OPENSSL_PUT_ERROR(CRYPTO, ERR_R_MALLOC_FAILURE);
+    OPENSSL_free(funcs);
+    goto err;
+  }
+
+  *out_index = sk_CRYPTO_EX_DATA_FUNCS_num(ex_data_class->meth) - 1 +
+               ex_data_class->num_reserved;
+  ret = 1;
+
+err:
+  CRYPTO_STATIC_MUTEX_unlock_write(&ex_data_class->lock);
+  return ret;
+}
+
+int CRYPTO_set_ex_data(CRYPTO_EX_DATA *ad, int index, void *val) {
+  int n, i;
+
+  if (ad->sk == NULL) {
+    ad->sk = sk_void_new_null();
+    if (ad->sk == NULL) {
+      OPENSSL_PUT_ERROR(CRYPTO, ERR_R_MALLOC_FAILURE);
+      return 0;
+    }
+  }
+
+  n = sk_void_num(ad->sk);
+
+  // Add NULL values until the stack is long enough.
+  for (i = n; i <= index; i++) {
+    if (!sk_void_push(ad->sk, NULL)) {
+      OPENSSL_PUT_ERROR(CRYPTO, ERR_R_MALLOC_FAILURE);
+      return 0;
+    }
+  }
+
+  sk_void_set(ad->sk, index, val);
+  return 1;
+}
+
+void *CRYPTO_get_ex_data(const CRYPTO_EX_DATA *ad, int idx) {
+  if (ad->sk == NULL || idx < 0 || (size_t)idx >= sk_void_num(ad->sk)) {
+    return NULL;
+  }
+  return sk_void_value(ad->sk, idx);
+}
+
+// get_func_pointers takes a copy of the CRYPTO_EX_DATA_FUNCS pointers, if any,
+// for the given class. If there are some pointers, it sets |*out| to point to
+// a fresh stack of them. Otherwise it sets |*out| to NULL. It returns one on
+// success or zero on error.
+static int get_func_pointers(STACK_OF(CRYPTO_EX_DATA_FUNCS) **out,
+                             CRYPTO_EX_DATA_CLASS *ex_data_class) {
+  size_t n;
+
+  *out = NULL;
+
+  // CRYPTO_EX_DATA_FUNCS structures are static once set, so we can take a
+  // shallow copy of the list under lock and then use the structures without
+  // the lock held.
+  CRYPTO_STATIC_MUTEX_lock_read(&ex_data_class->lock);
+  n = sk_CRYPTO_EX_DATA_FUNCS_num(ex_data_class->meth);
+  if (n > 0) {
+    *out = sk_CRYPTO_EX_DATA_FUNCS_dup(ex_data_class->meth);
+  }
+  CRYPTO_STATIC_MUTEX_unlock_read(&ex_data_class->lock);
+
+  if (n > 0 && *out == NULL) {
+    OPENSSL_PUT_ERROR(CRYPTO, ERR_R_MALLOC_FAILURE);
+    return 0;
+  }
+
+  return 1;
+}
+
+void CRYPTO_new_ex_data(CRYPTO_EX_DATA *ad) {
+  ad->sk = NULL;
+}
+
+void CRYPTO_free_ex_data(CRYPTO_EX_DATA_CLASS *ex_data_class, void *obj,
+                         CRYPTO_EX_DATA *ad) {
+  if (ad->sk == NULL) {
+    // Nothing to do.
+    return;
+  }
+
+  STACK_OF(CRYPTO_EX_DATA_FUNCS) *func_pointers;
+  if (!get_func_pointers(&func_pointers, ex_data_class)) {
+    // TODO(davidben): This leaks memory on malloc error.
+    return;
+  }
+
+  for (size_t i = 0; i < sk_CRYPTO_EX_DATA_FUNCS_num(func_pointers); i++) {
+    CRYPTO_EX_DATA_FUNCS *func_pointer =
+        sk_CRYPTO_EX_DATA_FUNCS_value(func_pointers, i);
+    if (func_pointer->free_func) {
+      void *ptr = CRYPTO_get_ex_data(ad, i + ex_data_class->num_reserved);
+      func_pointer->free_func(obj, ptr, ad, i + ex_data_class->num_reserved,
+                              func_pointer->argl, func_pointer->argp);
+    }
+  }
+
+  sk_CRYPTO_EX_DATA_FUNCS_free(func_pointers);
+
+  sk_void_free(ad->sk);
+  ad->sk = NULL;
+}
+
+void CRYPTO_cleanup_all_ex_data(void) {}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/aes/aes.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/aes/aes.c
@ -0,0 +1,108 @@
+/* ====================================================================
+ * Copyright (c) 2002-2006 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ==================================================================== */
+
+#include <CBigNumBoringSSL_aes.h>
+
+#include <assert.h>
+
+#include <CBigNumBoringSSL_cpu.h>
+
+#include "internal.h"
+#include "../modes/internal.h"
+
+
+// Be aware that different sets of AES functions use incompatible key
+// representations, varying in format of the key schedule, the |AES_KEY.rounds|
+// value, or both. Therefore they cannot mix. Also, on AArch64, the plain-C
+// code, above, is incompatible with the |aes_hw_*| functions.
+
+void AES_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
+  if (hwaes_capable()) {
+    aes_hw_encrypt(in, out, key);
+  } else if (vpaes_capable()) {
+    vpaes_encrypt(in, out, key);
+  } else {
+    aes_nohw_encrypt(in, out, key);
+  }
+}
+
+void AES_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key) {
+  if (hwaes_capable()) {
+    aes_hw_decrypt(in, out, key);
+  } else if (vpaes_capable()) {
+    vpaes_decrypt(in, out, key);
+  } else {
+    aes_nohw_decrypt(in, out, key);
+  }
+}
+
+int AES_set_encrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey) {
+  if (bits != 128 && bits != 192 && bits != 256) {
+    return -2;
+  }
+  if (hwaes_capable()) {
+    return aes_hw_set_encrypt_key(key, bits, aeskey);
+  } else if (vpaes_capable()) {
+    return vpaes_set_encrypt_key(key, bits, aeskey);
+  } else {
+    return aes_nohw_set_encrypt_key(key, bits, aeskey);
+  }
+}
+
+int AES_set_decrypt_key(const uint8_t *key, unsigned bits, AES_KEY *aeskey) {
+  if (bits != 128 && bits != 192 && bits != 256) {
+    return -2;
+  }
+  if (hwaes_capable()) {
+    return aes_hw_set_decrypt_key(key, bits, aeskey);
+  } else if (vpaes_capable()) {
+    return vpaes_set_decrypt_key(key, bits, aeskey);
+  } else {
+    return aes_nohw_set_decrypt_key(key, bits, aeskey);
+  }
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/aes/aes_nohw.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/aes/aes_nohw.c
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/aes/internal.h
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/aes/internal.h
@ -0,0 +1,238 @@
+/* Copyright (c) 2017, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#ifndef OPENSSL_HEADER_AES_INTERNAL_H
+#define OPENSSL_HEADER_AES_INTERNAL_H
+
+#include <stdlib.h>
+
+#include <CBigNumBoringSSL_cpu.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+#if !defined(OPENSSL_NO_ASM)
+
+#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
+#define HWAES
+#define HWAES_ECB
+
+OPENSSL_INLINE int hwaes_capable(void) {
+  return (OPENSSL_ia32cap_get()[1] & (1 << (57 - 32))) != 0;
+}
+
+#define VPAES
+#if defined(OPENSSL_X86_64)
+#define VPAES_CTR32
+#endif
+#define VPAES_CBC
+OPENSSL_INLINE int vpaes_capable(void) {
+  return (OPENSSL_ia32cap_get()[1] & (1 << (41 - 32))) != 0;
+}
+
+#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
+#define HWAES
+
+OPENSSL_INLINE int hwaes_capable(void) { return CRYPTO_is_ARMv8_AES_capable(); }
+
+#if defined(OPENSSL_ARM)
+#define BSAES
+#define VPAES
+#define VPAES_CTR32
+OPENSSL_INLINE int bsaes_capable(void) { return CRYPTO_is_NEON_capable(); }
+OPENSSL_INLINE int vpaes_capable(void) { return CRYPTO_is_NEON_capable(); }
+#endif
+
+#if defined(OPENSSL_AARCH64)
+#define VPAES
+#define VPAES_CBC
+#define VPAES_CTR32
+OPENSSL_INLINE int vpaes_capable(void) { return CRYPTO_is_NEON_capable(); }
+#endif
+
+#elif defined(OPENSSL_PPC64LE)
+#define HWAES
+
+OPENSSL_INLINE int hwaes_capable(void) {
+  return CRYPTO_is_PPC64LE_vcrypto_capable();
+}
+#endif
+
+#endif  // !NO_ASM
+
+
+#if defined(HWAES)
+
+int aes_hw_set_encrypt_key(const uint8_t *user_key, const int bits,
+                           AES_KEY *key);
+int aes_hw_set_decrypt_key(const uint8_t *user_key, const int bits,
+                           AES_KEY *key);
+void aes_hw_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
+void aes_hw_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
+void aes_hw_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
+                        const AES_KEY *key, uint8_t *ivec, const int enc);
+void aes_hw_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
+                                 const AES_KEY *key, const uint8_t ivec[16]);
+
+#else
+
+// If HWAES isn't defined then we provide dummy functions for each of the hwaes
+// functions.
+OPENSSL_INLINE int hwaes_capable(void) { return 0; }
+
+OPENSSL_INLINE int aes_hw_set_encrypt_key(const uint8_t *user_key, int bits,
+                                          AES_KEY *key) {
+  abort();
+}
+
+OPENSSL_INLINE int aes_hw_set_decrypt_key(const uint8_t *user_key, int bits,
+                                          AES_KEY *key) {
+  abort();
+}
+
+OPENSSL_INLINE void aes_hw_encrypt(const uint8_t *in, uint8_t *out,
+                                   const AES_KEY *key) {
+  abort();
+}
+
+OPENSSL_INLINE void aes_hw_decrypt(const uint8_t *in, uint8_t *out,
+                                   const AES_KEY *key) {
+  abort();
+}
+
+OPENSSL_INLINE void aes_hw_cbc_encrypt(const uint8_t *in, uint8_t *out,
+                                       size_t length, const AES_KEY *key,
+                                       uint8_t *ivec, int enc) {
+  abort();
+}
+
+OPENSSL_INLINE void aes_hw_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
+                                                size_t len, const AES_KEY *key,
+                                                const uint8_t ivec[16]) {
+  abort();
+}
+
+#endif  // !HWAES
+
+
+#if defined(HWAES_ECB)
+void aes_hw_ecb_encrypt(const uint8_t *in, uint8_t *out, size_t length,
+                        const AES_KEY *key, const int enc);
+#endif  // HWAES_ECB
+
+
+#if defined(BSAES)
+// Note |bsaes_cbc_encrypt| requires |enc| to be zero.
+void bsaes_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
+                       const AES_KEY *key, uint8_t ivec[16], int enc);
+void bsaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
+                                const AES_KEY *key, const uint8_t ivec[16]);
+// VPAES to BSAES conversions are available on all BSAES platforms.
+void vpaes_encrypt_key_to_bsaes(AES_KEY *out_bsaes, const AES_KEY *vpaes);
+void vpaes_decrypt_key_to_bsaes(AES_KEY *out_bsaes, const AES_KEY *vpaes);
+#else
+OPENSSL_INLINE char bsaes_capable(void) { return 0; }
+
+// On other platforms, bsaes_capable() will always return false and so the
+// following will never be called.
+OPENSSL_INLINE void bsaes_cbc_encrypt(const uint8_t *in, uint8_t *out,
+                                      size_t length, const AES_KEY *key,
+                                      uint8_t ivec[16], int enc) {
+  abort();
+}
+
+OPENSSL_INLINE void bsaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
+                                               size_t len, const AES_KEY *key,
+                                               const uint8_t ivec[16]) {
+  abort();
+}
+
+OPENSSL_INLINE void vpaes_encrypt_key_to_bsaes(AES_KEY *out_bsaes,
+                                               const AES_KEY *vpaes) {
+  abort();
+}
+
+OPENSSL_INLINE void vpaes_decrypt_key_to_bsaes(AES_KEY *out_bsaes,
+                                               const AES_KEY *vpaes) {
+  abort();
+}
+#endif  // !BSAES
+
+
+#if defined(VPAES)
+// On platforms where VPAES gets defined (just above), then these functions are
+// provided by asm.
+int vpaes_set_encrypt_key(const uint8_t *userKey, int bits, AES_KEY *key);
+int vpaes_set_decrypt_key(const uint8_t *userKey, int bits, AES_KEY *key);
+
+void vpaes_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
+void vpaes_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
+
+#if defined(VPAES_CBC)
+void vpaes_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
+                       const AES_KEY *key, uint8_t *ivec, int enc);
+#endif
+#if defined(VPAES_CTR32)
+void vpaes_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, size_t len,
+                                const AES_KEY *key, const uint8_t ivec[16]);
+#endif
+#else
+OPENSSL_INLINE char vpaes_capable(void) { return 0; }
+
+// On other platforms, vpaes_capable() will always return false and so the
+// following will never be called.
+OPENSSL_INLINE int vpaes_set_encrypt_key(const uint8_t *userKey, int bits,
+                                         AES_KEY *key) {
+  abort();
+}
+OPENSSL_INLINE int vpaes_set_decrypt_key(const uint8_t *userKey, int bits,
+                                         AES_KEY *key) {
+  abort();
+}
+OPENSSL_INLINE void vpaes_encrypt(const uint8_t *in, uint8_t *out,
+                                  const AES_KEY *key) {
+  abort();
+}
+OPENSSL_INLINE void vpaes_decrypt(const uint8_t *in, uint8_t *out,
+                                  const AES_KEY *key) {
+  abort();
+}
+OPENSSL_INLINE void vpaes_cbc_encrypt(const uint8_t *in, uint8_t *out,
+                                      size_t length, const AES_KEY *key,
+                                      uint8_t *ivec, int enc) {
+  abort();
+}
+#endif  // !VPAES
+
+
+int aes_nohw_set_encrypt_key(const uint8_t *key, unsigned bits,
+                             AES_KEY *aeskey);
+int aes_nohw_set_decrypt_key(const uint8_t *key, unsigned bits,
+                             AES_KEY *aeskey);
+void aes_nohw_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
+void aes_nohw_decrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key);
+void aes_nohw_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
+                                   size_t blocks, const AES_KEY *key,
+                                   const uint8_t ivec[16]);
+void aes_nohw_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len,
+                          const AES_KEY *key, uint8_t *ivec, const int enc);
+
+
+#if defined(__cplusplus)
+}  // extern C
+#endif
+
+#endif  // OPENSSL_HEADER_AES_INTERNAL_H
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/aes/key_wrap.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/aes/key_wrap.c
@ -0,0 +1,236 @@
+/* ====================================================================
+ * Copyright (c) 2001-2011 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ==================================================================== */
+
+#include <CBigNumBoringSSL_aes.h>
+
+#include <assert.h>
+#include <limits.h>
+#include <string.h>
+
+#include <CBigNumBoringSSL_mem.h>
+
+#include "../../internal.h"
+
+
+// kDefaultIV is the default IV value given in RFC 3394, 2.2.3.1.
+static const uint8_t kDefaultIV[] = {
+    0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6, 0xa6,
+};
+
+static const unsigned kBound = 6;
+
+int AES_wrap_key(const AES_KEY *key, const uint8_t *iv, uint8_t *out,
+                 const uint8_t *in, size_t in_len) {
+  // See RFC 3394, section 2.2.1. Additionally, note that section 2 requires the
+  // plaintext be at least two 8-byte blocks.
+
+  if (in_len > INT_MAX - 8 || in_len < 16 || in_len % 8 != 0) {
+    return -1;
+  }
+
+  if (iv == NULL) {
+    iv = kDefaultIV;
+  }
+
+  OPENSSL_memmove(out + 8, in, in_len);
+  uint8_t A[AES_BLOCK_SIZE];
+  OPENSSL_memcpy(A, iv, 8);
+
+  size_t n = in_len / 8;
+
+  for (unsigned j = 0; j < kBound; j++) {
+    for (size_t i = 1; i <= n; i++) {
+      OPENSSL_memcpy(A + 8, out + 8 * i, 8);
+      AES_encrypt(A, A, key);
+
+      uint32_t t = (uint32_t)(n * j + i);
+      A[7] ^= t & 0xff;
+      A[6] ^= (t >> 8) & 0xff;
+      A[5] ^= (t >> 16) & 0xff;
+      A[4] ^= (t >> 24) & 0xff;
+      OPENSSL_memcpy(out + 8 * i, A + 8, 8);
+    }
+  }
+
+  OPENSSL_memcpy(out, A, 8);
+  return (int)in_len + 8;
+}
+
+// aes_unwrap_key_inner performs steps one and two from
+// https://tools.ietf.org/html/rfc3394#section-2.2.2
+static int aes_unwrap_key_inner(const AES_KEY *key, uint8_t *out,
+                                uint8_t out_iv[8], const uint8_t *in,
+                                size_t in_len) {
+  // See RFC 3394, section 2.2.2. Additionally, note that section 2 requires the
+  // plaintext be at least two 8-byte blocks, so the ciphertext must be at least
+  // three blocks.
+
+  if (in_len > INT_MAX || in_len < 24 || in_len % 8 != 0) {
+    return 0;
+  }
+
+  uint8_t A[AES_BLOCK_SIZE];
+  OPENSSL_memcpy(A, in, 8);
+  OPENSSL_memmove(out, in + 8, in_len - 8);
+
+  size_t n = (in_len / 8) - 1;
+
+  for (unsigned j = kBound - 1; j < kBound; j--) {
+    for (size_t i = n; i > 0; i--) {
+      uint32_t t = (uint32_t)(n * j + i);
+      A[7] ^= t & 0xff;
+      A[6] ^= (t >> 8) & 0xff;
+      A[5] ^= (t >> 16) & 0xff;
+      A[4] ^= (t >> 24) & 0xff;
+      OPENSSL_memcpy(A + 8, out + 8 * (i - 1), 8);
+      AES_decrypt(A, A, key);
+      OPENSSL_memcpy(out + 8 * (i - 1), A + 8, 8);
+    }
+  }
+
+  memcpy(out_iv, A, 8);
+  return 1;
+}
+
+int AES_unwrap_key(const AES_KEY *key, const uint8_t *iv, uint8_t *out,
+                   const uint8_t *in, size_t in_len) {
+  uint8_t calculated_iv[8];
+  if (!aes_unwrap_key_inner(key, out, calculated_iv, in, in_len)) {
+    return -1;
+  }
+
+  if (iv == NULL) {
+    iv = kDefaultIV;
+  }
+  if (CRYPTO_memcmp(calculated_iv, iv, 8) != 0) {
+    return -1;
+  }
+
+  return (int)in_len - 8;
+}
+
+// kPaddingConstant is used in Key Wrap with Padding. See
+// https://tools.ietf.org/html/rfc5649#section-3
+static const uint8_t kPaddingConstant[4] = {0xa6, 0x59, 0x59, 0xa6};
+
+int AES_wrap_key_padded(const AES_KEY *key, uint8_t *out, size_t *out_len,
+                        size_t max_out, const uint8_t *in, size_t in_len) {
+  // See https://tools.ietf.org/html/rfc5649#section-4.1
+  const uint32_t in_len32_be = CRYPTO_bswap4(in_len);
+  const uint64_t in_len64 = in_len;
+  const size_t padded_len = (in_len + 7) & ~7;
+
+  *out_len = 0;
+  if (in_len == 0 || in_len64 > 0xffffffffu || in_len + 7 < in_len ||
+      padded_len + 8 < padded_len || max_out < padded_len + 8) {
+    return 0;
+  }
+
+  uint8_t block[AES_BLOCK_SIZE];
+  memcpy(block, kPaddingConstant, sizeof(kPaddingConstant));
+  memcpy(block + 4, &in_len32_be, sizeof(in_len32_be));
+
+  if (in_len <= 8) {
+    memset(block + 8, 0, 8);
+    memcpy(block + 8, in, in_len);
+    AES_encrypt(block, out, key);
+    *out_len = AES_BLOCK_SIZE;
+    return 1;
+  }
+
+  uint8_t *padded_in = OPENSSL_malloc(padded_len);
+  if (padded_in == NULL) {
+    return 0;
+  }
+  assert(padded_len >= 8);
+  memset(padded_in + padded_len - 8, 0, 8);
+  memcpy(padded_in, in, in_len);
+  const int ret = AES_wrap_key(key, block, out, padded_in, padded_len);
+  OPENSSL_free(padded_in);
+  if (ret < 0) {
+    return 0;
+  }
+  *out_len = ret;
+  return 1;
+}
+
+int AES_unwrap_key_padded(const AES_KEY *key, uint8_t *out, size_t *out_len,
+                          size_t max_out, const uint8_t *in, size_t in_len) {
+  *out_len = 0;
+  if (in_len < AES_BLOCK_SIZE || max_out < in_len - 8) {
+    return 0;
+  }
+
+  uint8_t iv[8];
+  if (in_len == AES_BLOCK_SIZE) {
+    uint8_t block[AES_BLOCK_SIZE];
+    AES_decrypt(in, block, key);
+    memcpy(iv, block, sizeof(iv));
+    memcpy(out, block + 8, 8);
+  } else if (!aes_unwrap_key_inner(key, out, iv, in, in_len)) {
+    return 0;
+  }
+  assert(in_len % 8 == 0);
+
+  crypto_word_t ok = constant_time_eq_int(
+      CRYPTO_memcmp(iv, kPaddingConstant, sizeof(kPaddingConstant)), 0);
+
+  uint32_t claimed_len32;
+  memcpy(&claimed_len32, iv + 4, sizeof(claimed_len32));
+  const size_t claimed_len = CRYPTO_bswap4(claimed_len32);
+  ok &= ~constant_time_is_zero_w(claimed_len);
+  ok &= constant_time_eq_w((claimed_len - 1) >> 3, (in_len - 9) >> 3);
+
+  // Check that padding bytes are all zero.
+  for (size_t i = in_len - 15; i < in_len - 8; i++) {
+    ok &= constant_time_is_zero_w(constant_time_ge_8(i, claimed_len) & out[i]);
+  }
+
+  *out_len = constant_time_select_w(ok, claimed_len, 0);
+  return ok & 1;
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/aes/mode_wrappers.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/aes/mode_wrappers.c
@ -0,0 +1,106 @@
+/* ====================================================================
+ * Copyright (c) 2002-2006 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ==================================================================== */
+
+#include <CBigNumBoringSSL_aes.h>
+
+#include <assert.h>
+
+#include "../aes/internal.h"
+#include "../modes/internal.h"
+
+
+void AES_ctr128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
+                        const AES_KEY *key, uint8_t ivec[AES_BLOCK_SIZE],
+                        uint8_t ecount_buf[AES_BLOCK_SIZE], unsigned int *num) {
+  CRYPTO_ctr128_encrypt(in, out, len, key, ivec, ecount_buf, num, AES_encrypt);
+}
+
+void AES_ecb_encrypt(const uint8_t *in, uint8_t *out, const AES_KEY *key,
+                     const int enc) {
+  assert(in && out && key);
+  assert((AES_ENCRYPT == enc) || (AES_DECRYPT == enc));
+
+  if (AES_ENCRYPT == enc) {
+    AES_encrypt(in, out, key);
+  } else {
+    AES_decrypt(in, out, key);
+  }
+}
+
+void AES_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len,
+                     const AES_KEY *key, uint8_t *ivec, const int enc) {
+  if (hwaes_capable()) {
+    aes_hw_cbc_encrypt(in, out, len, key, ivec, enc);
+    return;
+  }
+
+  if (!vpaes_capable()) {
+    aes_nohw_cbc_encrypt(in, out, len, key, ivec, enc);
+    return;
+  }
+  if (enc) {
+    CRYPTO_cbc128_encrypt(in, out, len, key, ivec, AES_encrypt);
+  } else {
+    CRYPTO_cbc128_decrypt(in, out, len, key, ivec, AES_decrypt);
+  }
+}
+
+void AES_ofb128_encrypt(const uint8_t *in, uint8_t *out, size_t length,
+                        const AES_KEY *key, uint8_t *ivec, int *num) {
+  unsigned num_u = (unsigned)(*num);
+  CRYPTO_ofb128_encrypt(in, out, length, key, ivec, &num_u, AES_encrypt);
+  *num = (int)num_u;
+}
+
+void AES_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t length,
+                        const AES_KEY *key, uint8_t *ivec, int *num,
+                        int enc) {
+  unsigned num_u = (unsigned)(*num);
+  CRYPTO_cfb128_encrypt(in, out, length, key, ivec, &num_u, enc, AES_encrypt);
+  *num = (int)num_u;
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/aesni-gcm-x86_64.linux.x86_64.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/aesni-gcm-x86_64.linux.x86_64.S
@ -0,0 +1,859 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__x86_64__) && defined(__linux__)
+# This file is generated from a similarly-named Perl script in the BoringSSL
+# source tree. Do not edit by hand.
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+#endif
+
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+.text	
+
+.type	_aesni_ctr32_ghash_6x,@function
+.align	32
+_aesni_ctr32_ghash_6x:
+.cfi_startproc	
+	vmovdqu	32(%r11),%xmm2
+	subq	$6,%rdx
+	vpxor	%xmm4,%xmm4,%xmm4
+	vmovdqu	0-128(%rcx),%xmm15
+	vpaddb	%xmm2,%xmm1,%xmm10
+	vpaddb	%xmm2,%xmm10,%xmm11
+	vpaddb	%xmm2,%xmm11,%xmm12
+	vpaddb	%xmm2,%xmm12,%xmm13
+	vpaddb	%xmm2,%xmm13,%xmm14
+	vpxor	%xmm15,%xmm1,%xmm9
+	vmovdqu	%xmm4,16+8(%rsp)
+	jmp	.Loop6x
+
+.align	32
+.Loop6x:
+	addl	$100663296,%ebx
+	jc	.Lhandle_ctr32
+	vmovdqu	0-32(%r9),%xmm3
+	vpaddb	%xmm2,%xmm14,%xmm1
+	vpxor	%xmm15,%xmm10,%xmm10
+	vpxor	%xmm15,%xmm11,%xmm11
+
+.Lresume_ctr32:
+	vmovdqu	%xmm1,(%r8)
+	vpclmulqdq	$0x10,%xmm3,%xmm7,%xmm5
+	vpxor	%xmm15,%xmm12,%xmm12
+	vmovups	16-128(%rcx),%xmm2
+	vpclmulqdq	$0x01,%xmm3,%xmm7,%xmm6
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+	xorq	%r12,%r12
+	cmpq	%r14,%r15
+
+	vaesenc	%xmm2,%xmm9,%xmm9
+	vmovdqu	48+8(%rsp),%xmm0
+	vpxor	%xmm15,%xmm13,%xmm13
+	vpclmulqdq	$0x00,%xmm3,%xmm7,%xmm1
+	vaesenc	%xmm2,%xmm10,%xmm10
+	vpxor	%xmm15,%xmm14,%xmm14
+	setnc	%r12b
+	vpclmulqdq	$0x11,%xmm3,%xmm7,%xmm7
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vmovdqu	16-32(%r9),%xmm3
+	negq	%r12
+	vaesenc	%xmm2,%xmm12,%xmm12
+	vpxor	%xmm5,%xmm6,%xmm6
+	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm5
+	vpxor	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm2,%xmm13,%xmm13
+	vpxor	%xmm5,%xmm1,%xmm4
+	andq	$0x60,%r12
+	vmovups	32-128(%rcx),%xmm15
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm1
+	vaesenc	%xmm2,%xmm14,%xmm14
+
+	vpclmulqdq	$0x01,%xmm3,%xmm0,%xmm2
+	leaq	(%r14,%r12,1),%r14
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	16+8(%rsp),%xmm8,%xmm8
+	vpclmulqdq	$0x11,%xmm3,%xmm0,%xmm3
+	vmovdqu	64+8(%rsp),%xmm0
+	vaesenc	%xmm15,%xmm10,%xmm10
+	movbeq	88(%r14),%r13
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	80(%r14),%r12
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r13,32+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	movq	%r12,40+8(%rsp)
+	vmovdqu	48-32(%r9),%xmm5
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vmovups	48-128(%rcx),%xmm15
+	vpxor	%xmm1,%xmm6,%xmm6
+	vpclmulqdq	$0x00,%xmm5,%xmm0,%xmm1
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm2,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm5,%xmm0,%xmm2
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vpxor	%xmm3,%xmm7,%xmm7
+	vpclmulqdq	$0x01,%xmm5,%xmm0,%xmm3
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vpclmulqdq	$0x11,%xmm5,%xmm0,%xmm5
+	vmovdqu	80+8(%rsp),%xmm0
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vpxor	%xmm1,%xmm4,%xmm4
+	vmovdqu	64-32(%r9),%xmm1
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vmovups	64-128(%rcx),%xmm15
+	vpxor	%xmm2,%xmm6,%xmm6
+	vpclmulqdq	$0x00,%xmm1,%xmm0,%xmm2
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm3,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm1,%xmm0,%xmm3
+	vaesenc	%xmm15,%xmm10,%xmm10
+	movbeq	72(%r14),%r13
+	vpxor	%xmm5,%xmm7,%xmm7
+	vpclmulqdq	$0x01,%xmm1,%xmm0,%xmm5
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	64(%r14),%r12
+	vpclmulqdq	$0x11,%xmm1,%xmm0,%xmm1
+	vmovdqu	96+8(%rsp),%xmm0
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r13,48+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	movq	%r12,56+8(%rsp)
+	vpxor	%xmm2,%xmm4,%xmm4
+	vmovdqu	96-32(%r9),%xmm2
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vmovups	80-128(%rcx),%xmm15
+	vpxor	%xmm3,%xmm6,%xmm6
+	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm3
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm5,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm2,%xmm0,%xmm5
+	vaesenc	%xmm15,%xmm10,%xmm10
+	movbeq	56(%r14),%r13
+	vpxor	%xmm1,%xmm7,%xmm7
+	vpclmulqdq	$0x01,%xmm2,%xmm0,%xmm1
+	vpxor	112+8(%rsp),%xmm8,%xmm8
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	48(%r14),%r12
+	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm2
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r13,64+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	movq	%r12,72+8(%rsp)
+	vpxor	%xmm3,%xmm4,%xmm4
+	vmovdqu	112-32(%r9),%xmm3
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vmovups	96-128(%rcx),%xmm15
+	vpxor	%xmm5,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm3,%xmm8,%xmm5
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm1,%xmm6,%xmm6
+	vpclmulqdq	$0x01,%xmm3,%xmm8,%xmm1
+	vaesenc	%xmm15,%xmm10,%xmm10
+	movbeq	40(%r14),%r13
+	vpxor	%xmm2,%xmm7,%xmm7
+	vpclmulqdq	$0x00,%xmm3,%xmm8,%xmm2
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	32(%r14),%r12
+	vpclmulqdq	$0x11,%xmm3,%xmm8,%xmm8
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r13,80+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	movq	%r12,88+8(%rsp)
+	vpxor	%xmm5,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm14,%xmm14
+	vpxor	%xmm1,%xmm6,%xmm6
+
+	vmovups	112-128(%rcx),%xmm15
+	vpslldq	$8,%xmm6,%xmm5
+	vpxor	%xmm2,%xmm4,%xmm4
+	vmovdqu	16(%r11),%xmm3
+
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm8,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vpxor	%xmm5,%xmm4,%xmm4
+	movbeq	24(%r14),%r13
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	16(%r14),%r12
+	vpalignr	$8,%xmm4,%xmm4,%xmm0
+	vpclmulqdq	$0x10,%xmm3,%xmm4,%xmm4
+	movq	%r13,96+8(%rsp)
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r12,104+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vmovups	128-128(%rcx),%xmm1
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vmovups	144-128(%rcx),%xmm15
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vpsrldq	$8,%xmm6,%xmm6
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vpxor	%xmm6,%xmm7,%xmm7
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vpxor	%xmm0,%xmm4,%xmm4
+	movbeq	8(%r14),%r13
+	vaesenc	%xmm1,%xmm13,%xmm13
+	movbeq	0(%r14),%r12
+	vaesenc	%xmm1,%xmm14,%xmm14
+	vmovups	160-128(%rcx),%xmm1
+	cmpl	$11,%ebp
+	jb	.Lenc_tail
+
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+	vmovups	176-128(%rcx),%xmm15
+	vaesenc	%xmm1,%xmm14,%xmm14
+	vmovups	192-128(%rcx),%xmm1
+	je	.Lenc_tail
+
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+	vmovups	208-128(%rcx),%xmm15
+	vaesenc	%xmm1,%xmm14,%xmm14
+	vmovups	224-128(%rcx),%xmm1
+	jmp	.Lenc_tail
+
+.align	32
+.Lhandle_ctr32:
+	vmovdqu	(%r11),%xmm0
+	vpshufb	%xmm0,%xmm1,%xmm6
+	vmovdqu	48(%r11),%xmm5
+	vpaddd	64(%r11),%xmm6,%xmm10
+	vpaddd	%xmm5,%xmm6,%xmm11
+	vmovdqu	0-32(%r9),%xmm3
+	vpaddd	%xmm5,%xmm10,%xmm12
+	vpshufb	%xmm0,%xmm10,%xmm10
+	vpaddd	%xmm5,%xmm11,%xmm13
+	vpshufb	%xmm0,%xmm11,%xmm11
+	vpxor	%xmm15,%xmm10,%xmm10
+	vpaddd	%xmm5,%xmm12,%xmm14
+	vpshufb	%xmm0,%xmm12,%xmm12
+	vpxor	%xmm15,%xmm11,%xmm11
+	vpaddd	%xmm5,%xmm13,%xmm1
+	vpshufb	%xmm0,%xmm13,%xmm13
+	vpshufb	%xmm0,%xmm14,%xmm14
+	vpshufb	%xmm0,%xmm1,%xmm1
+	jmp	.Lresume_ctr32
+
+.align	32
+.Lenc_tail:
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vmovdqu	%xmm7,16+8(%rsp)
+	vpalignr	$8,%xmm4,%xmm4,%xmm8
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vpclmulqdq	$0x10,%xmm3,%xmm4,%xmm4
+	vpxor	0(%rdi),%xmm1,%xmm2
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vpxor	16(%rdi),%xmm1,%xmm0
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vpxor	32(%rdi),%xmm1,%xmm5
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vpxor	48(%rdi),%xmm1,%xmm6
+	vaesenc	%xmm15,%xmm14,%xmm14
+	vpxor	64(%rdi),%xmm1,%xmm7
+	vpxor	80(%rdi),%xmm1,%xmm3
+	vmovdqu	(%r8),%xmm1
+
+	vaesenclast	%xmm2,%xmm9,%xmm9
+	vmovdqu	32(%r11),%xmm2
+	vaesenclast	%xmm0,%xmm10,%xmm10
+	vpaddb	%xmm2,%xmm1,%xmm0
+	movq	%r13,112+8(%rsp)
+	leaq	96(%rdi),%rdi
+	vaesenclast	%xmm5,%xmm11,%xmm11
+	vpaddb	%xmm2,%xmm0,%xmm5
+	movq	%r12,120+8(%rsp)
+	leaq	96(%rsi),%rsi
+	vmovdqu	0-128(%rcx),%xmm15
+	vaesenclast	%xmm6,%xmm12,%xmm12
+	vpaddb	%xmm2,%xmm5,%xmm6
+	vaesenclast	%xmm7,%xmm13,%xmm13
+	vpaddb	%xmm2,%xmm6,%xmm7
+	vaesenclast	%xmm3,%xmm14,%xmm14
+	vpaddb	%xmm2,%xmm7,%xmm3
+
+	addq	$0x60,%r10
+	subq	$0x6,%rdx
+	jc	.L6x_done
+
+	vmovups	%xmm9,-96(%rsi)
+	vpxor	%xmm15,%xmm1,%xmm9
+	vmovups	%xmm10,-80(%rsi)
+	vmovdqa	%xmm0,%xmm10
+	vmovups	%xmm11,-64(%rsi)
+	vmovdqa	%xmm5,%xmm11
+	vmovups	%xmm12,-48(%rsi)
+	vmovdqa	%xmm6,%xmm12
+	vmovups	%xmm13,-32(%rsi)
+	vmovdqa	%xmm7,%xmm13
+	vmovups	%xmm14,-16(%rsi)
+	vmovdqa	%xmm3,%xmm14
+	vmovdqu	32+8(%rsp),%xmm7
+	jmp	.Loop6x
+
+.L6x_done:
+	vpxor	16+8(%rsp),%xmm8,%xmm8
+	vpxor	%xmm4,%xmm8,%xmm8
+
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	_aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x
+.globl	aesni_gcm_decrypt
+.hidden aesni_gcm_decrypt
+.type	aesni_gcm_decrypt,@function
+.align	32
+aesni_gcm_decrypt:
+.cfi_startproc	
+	xorq	%r10,%r10
+
+
+
+	cmpq	$0x60,%rdx
+	jb	.Lgcm_dec_abort
+
+	leaq	(%rsp),%rax
+.cfi_def_cfa_register	%rax
+	pushq	%rbx
+.cfi_offset	%rbx,-16
+	pushq	%rbp
+.cfi_offset	%rbp,-24
+	pushq	%r12
+.cfi_offset	%r12,-32
+	pushq	%r13
+.cfi_offset	%r13,-40
+	pushq	%r14
+.cfi_offset	%r14,-48
+	pushq	%r15
+.cfi_offset	%r15,-56
+	vzeroupper
+
+	vmovdqu	(%r8),%xmm1
+	addq	$-128,%rsp
+	movl	12(%r8),%ebx
+	leaq	.Lbswap_mask(%rip),%r11
+	leaq	-128(%rcx),%r14
+	movq	$0xf80,%r15
+	vmovdqu	(%r9),%xmm8
+	andq	$-128,%rsp
+	vmovdqu	(%r11),%xmm0
+	leaq	128(%rcx),%rcx
+	leaq	32+32(%r9),%r9
+	movl	240-128(%rcx),%ebp
+	vpshufb	%xmm0,%xmm8,%xmm8
+
+	andq	%r15,%r14
+	andq	%rsp,%r15
+	subq	%r14,%r15
+	jc	.Ldec_no_key_aliasing
+	cmpq	$768,%r15
+	jnc	.Ldec_no_key_aliasing
+	subq	%r15,%rsp
+.Ldec_no_key_aliasing:
+
+	vmovdqu	80(%rdi),%xmm7
+	leaq	(%rdi),%r14
+	vmovdqu	64(%rdi),%xmm4
+
+
+
+
+
+
+
+	leaq	-192(%rdi,%rdx,1),%r15
+
+	vmovdqu	48(%rdi),%xmm5
+	shrq	$4,%rdx
+	xorq	%r10,%r10
+	vmovdqu	32(%rdi),%xmm6
+	vpshufb	%xmm0,%xmm7,%xmm7
+	vmovdqu	16(%rdi),%xmm2
+	vpshufb	%xmm0,%xmm4,%xmm4
+	vmovdqu	(%rdi),%xmm3
+	vpshufb	%xmm0,%xmm5,%xmm5
+	vmovdqu	%xmm4,48(%rsp)
+	vpshufb	%xmm0,%xmm6,%xmm6
+	vmovdqu	%xmm5,64(%rsp)
+	vpshufb	%xmm0,%xmm2,%xmm2
+	vmovdqu	%xmm6,80(%rsp)
+	vpshufb	%xmm0,%xmm3,%xmm3
+	vmovdqu	%xmm2,96(%rsp)
+	vmovdqu	%xmm3,112(%rsp)
+
+	call	_aesni_ctr32_ghash_6x
+
+	vmovups	%xmm9,-96(%rsi)
+	vmovups	%xmm10,-80(%rsi)
+	vmovups	%xmm11,-64(%rsi)
+	vmovups	%xmm12,-48(%rsi)
+	vmovups	%xmm13,-32(%rsi)
+	vmovups	%xmm14,-16(%rsi)
+
+	vpshufb	(%r11),%xmm8,%xmm8
+	vmovdqu	%xmm8,-64(%r9)
+
+	vzeroupper
+	movq	-48(%rax),%r15
+.cfi_restore	%r15
+	movq	-40(%rax),%r14
+.cfi_restore	%r14
+	movq	-32(%rax),%r13
+.cfi_restore	%r13
+	movq	-24(%rax),%r12
+.cfi_restore	%r12
+	movq	-16(%rax),%rbp
+.cfi_restore	%rbp
+	movq	-8(%rax),%rbx
+.cfi_restore	%rbx
+	leaq	(%rax),%rsp
+.cfi_def_cfa_register	%rsp
+.Lgcm_dec_abort:
+	movq	%r10,%rax
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aesni_gcm_decrypt,.-aesni_gcm_decrypt
+.type	_aesni_ctr32_6x,@function
+.align	32
+_aesni_ctr32_6x:
+.cfi_startproc	
+	vmovdqu	0-128(%rcx),%xmm4
+	vmovdqu	32(%r11),%xmm2
+	leaq	-1(%rbp),%r13
+	vmovups	16-128(%rcx),%xmm15
+	leaq	32-128(%rcx),%r12
+	vpxor	%xmm4,%xmm1,%xmm9
+	addl	$100663296,%ebx
+	jc	.Lhandle_ctr32_2
+	vpaddb	%xmm2,%xmm1,%xmm10
+	vpaddb	%xmm2,%xmm10,%xmm11
+	vpxor	%xmm4,%xmm10,%xmm10
+	vpaddb	%xmm2,%xmm11,%xmm12
+	vpxor	%xmm4,%xmm11,%xmm11
+	vpaddb	%xmm2,%xmm12,%xmm13
+	vpxor	%xmm4,%xmm12,%xmm12
+	vpaddb	%xmm2,%xmm13,%xmm14
+	vpxor	%xmm4,%xmm13,%xmm13
+	vpaddb	%xmm2,%xmm14,%xmm1
+	vpxor	%xmm4,%xmm14,%xmm14
+	jmp	.Loop_ctr32
+
+.align	16
+.Loop_ctr32:
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vaesenc	%xmm15,%xmm14,%xmm14
+	vmovups	(%r12),%xmm15
+	leaq	16(%r12),%r12
+	decl	%r13d
+	jnz	.Loop_ctr32
+
+	vmovdqu	(%r12),%xmm3
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	0(%rdi),%xmm3,%xmm4
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vpxor	16(%rdi),%xmm3,%xmm5
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vpxor	32(%rdi),%xmm3,%xmm6
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vpxor	48(%rdi),%xmm3,%xmm8
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vpxor	64(%rdi),%xmm3,%xmm2
+	vaesenc	%xmm15,%xmm14,%xmm14
+	vpxor	80(%rdi),%xmm3,%xmm3
+	leaq	96(%rdi),%rdi
+
+	vaesenclast	%xmm4,%xmm9,%xmm9
+	vaesenclast	%xmm5,%xmm10,%xmm10
+	vaesenclast	%xmm6,%xmm11,%xmm11
+	vaesenclast	%xmm8,%xmm12,%xmm12
+	vaesenclast	%xmm2,%xmm13,%xmm13
+	vaesenclast	%xmm3,%xmm14,%xmm14
+	vmovups	%xmm9,0(%rsi)
+	vmovups	%xmm10,16(%rsi)
+	vmovups	%xmm11,32(%rsi)
+	vmovups	%xmm12,48(%rsi)
+	vmovups	%xmm13,64(%rsi)
+	vmovups	%xmm14,80(%rsi)
+	leaq	96(%rsi),%rsi
+
+	.byte	0xf3,0xc3
+.align	32
+.Lhandle_ctr32_2:
+	vpshufb	%xmm0,%xmm1,%xmm6
+	vmovdqu	48(%r11),%xmm5
+	vpaddd	64(%r11),%xmm6,%xmm10
+	vpaddd	%xmm5,%xmm6,%xmm11
+	vpaddd	%xmm5,%xmm10,%xmm12
+	vpshufb	%xmm0,%xmm10,%xmm10
+	vpaddd	%xmm5,%xmm11,%xmm13
+	vpshufb	%xmm0,%xmm11,%xmm11
+	vpxor	%xmm4,%xmm10,%xmm10
+	vpaddd	%xmm5,%xmm12,%xmm14
+	vpshufb	%xmm0,%xmm12,%xmm12
+	vpxor	%xmm4,%xmm11,%xmm11
+	vpaddd	%xmm5,%xmm13,%xmm1
+	vpshufb	%xmm0,%xmm13,%xmm13
+	vpxor	%xmm4,%xmm12,%xmm12
+	vpshufb	%xmm0,%xmm14,%xmm14
+	vpxor	%xmm4,%xmm13,%xmm13
+	vpshufb	%xmm0,%xmm1,%xmm1
+	vpxor	%xmm4,%xmm14,%xmm14
+	jmp	.Loop_ctr32
+.cfi_endproc	
+.size	_aesni_ctr32_6x,.-_aesni_ctr32_6x
+
+.globl	aesni_gcm_encrypt
+.hidden aesni_gcm_encrypt
+.type	aesni_gcm_encrypt,@function
+.align	32
+aesni_gcm_encrypt:
+.cfi_startproc	
+#ifdef BORINGSSL_DISPATCH_TEST
+.extern	BORINGSSL_function_hit
+.hidden BORINGSSL_function_hit
+	movb	$1,BORINGSSL_function_hit+2(%rip)
+#endif
+	xorq	%r10,%r10
+
+
+
+
+	cmpq	$288,%rdx
+	jb	.Lgcm_enc_abort
+
+	leaq	(%rsp),%rax
+.cfi_def_cfa_register	%rax
+	pushq	%rbx
+.cfi_offset	%rbx,-16
+	pushq	%rbp
+.cfi_offset	%rbp,-24
+	pushq	%r12
+.cfi_offset	%r12,-32
+	pushq	%r13
+.cfi_offset	%r13,-40
+	pushq	%r14
+.cfi_offset	%r14,-48
+	pushq	%r15
+.cfi_offset	%r15,-56
+	vzeroupper
+
+	vmovdqu	(%r8),%xmm1
+	addq	$-128,%rsp
+	movl	12(%r8),%ebx
+	leaq	.Lbswap_mask(%rip),%r11
+	leaq	-128(%rcx),%r14
+	movq	$0xf80,%r15
+	leaq	128(%rcx),%rcx
+	vmovdqu	(%r11),%xmm0
+	andq	$-128,%rsp
+	movl	240-128(%rcx),%ebp
+
+	andq	%r15,%r14
+	andq	%rsp,%r15
+	subq	%r14,%r15
+	jc	.Lenc_no_key_aliasing
+	cmpq	$768,%r15
+	jnc	.Lenc_no_key_aliasing
+	subq	%r15,%rsp
+.Lenc_no_key_aliasing:
+
+	leaq	(%rsi),%r14
+
+
+
+
+
+
+
+
+	leaq	-192(%rsi,%rdx,1),%r15
+
+	shrq	$4,%rdx
+
+	call	_aesni_ctr32_6x
+	vpshufb	%xmm0,%xmm9,%xmm8
+	vpshufb	%xmm0,%xmm10,%xmm2
+	vmovdqu	%xmm8,112(%rsp)
+	vpshufb	%xmm0,%xmm11,%xmm4
+	vmovdqu	%xmm2,96(%rsp)
+	vpshufb	%xmm0,%xmm12,%xmm5
+	vmovdqu	%xmm4,80(%rsp)
+	vpshufb	%xmm0,%xmm13,%xmm6
+	vmovdqu	%xmm5,64(%rsp)
+	vpshufb	%xmm0,%xmm14,%xmm7
+	vmovdqu	%xmm6,48(%rsp)
+
+	call	_aesni_ctr32_6x
+
+	vmovdqu	(%r9),%xmm8
+	leaq	32+32(%r9),%r9
+	subq	$12,%rdx
+	movq	$192,%r10
+	vpshufb	%xmm0,%xmm8,%xmm8
+
+	call	_aesni_ctr32_ghash_6x
+	vmovdqu	32(%rsp),%xmm7
+	vmovdqu	(%r11),%xmm0
+	vmovdqu	0-32(%r9),%xmm3
+	vpunpckhqdq	%xmm7,%xmm7,%xmm1
+	vmovdqu	32-32(%r9),%xmm15
+	vmovups	%xmm9,-96(%rsi)
+	vpshufb	%xmm0,%xmm9,%xmm9
+	vpxor	%xmm7,%xmm1,%xmm1
+	vmovups	%xmm10,-80(%rsi)
+	vpshufb	%xmm0,%xmm10,%xmm10
+	vmovups	%xmm11,-64(%rsi)
+	vpshufb	%xmm0,%xmm11,%xmm11
+	vmovups	%xmm12,-48(%rsi)
+	vpshufb	%xmm0,%xmm12,%xmm12
+	vmovups	%xmm13,-32(%rsi)
+	vpshufb	%xmm0,%xmm13,%xmm13
+	vmovups	%xmm14,-16(%rsi)
+	vpshufb	%xmm0,%xmm14,%xmm14
+	vmovdqu	%xmm9,16(%rsp)
+	vmovdqu	48(%rsp),%xmm6
+	vmovdqu	16-32(%r9),%xmm0
+	vpunpckhqdq	%xmm6,%xmm6,%xmm2
+	vpclmulqdq	$0x00,%xmm3,%xmm7,%xmm5
+	vpxor	%xmm6,%xmm2,%xmm2
+	vpclmulqdq	$0x11,%xmm3,%xmm7,%xmm7
+	vpclmulqdq	$0x00,%xmm15,%xmm1,%xmm1
+
+	vmovdqu	64(%rsp),%xmm9
+	vpclmulqdq	$0x00,%xmm0,%xmm6,%xmm4
+	vmovdqu	48-32(%r9),%xmm3
+	vpxor	%xmm5,%xmm4,%xmm4
+	vpunpckhqdq	%xmm9,%xmm9,%xmm5
+	vpclmulqdq	$0x11,%xmm0,%xmm6,%xmm6
+	vpxor	%xmm9,%xmm5,%xmm5
+	vpxor	%xmm7,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm15,%xmm2,%xmm2
+	vmovdqu	80-32(%r9),%xmm15
+	vpxor	%xmm1,%xmm2,%xmm2
+
+	vmovdqu	80(%rsp),%xmm1
+	vpclmulqdq	$0x00,%xmm3,%xmm9,%xmm7
+	vmovdqu	64-32(%r9),%xmm0
+	vpxor	%xmm4,%xmm7,%xmm7
+	vpunpckhqdq	%xmm1,%xmm1,%xmm4
+	vpclmulqdq	$0x11,%xmm3,%xmm9,%xmm9
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpxor	%xmm6,%xmm9,%xmm9
+	vpclmulqdq	$0x00,%xmm15,%xmm5,%xmm5
+	vpxor	%xmm2,%xmm5,%xmm5
+
+	vmovdqu	96(%rsp),%xmm2
+	vpclmulqdq	$0x00,%xmm0,%xmm1,%xmm6
+	vmovdqu	96-32(%r9),%xmm3
+	vpxor	%xmm7,%xmm6,%xmm6
+	vpunpckhqdq	%xmm2,%xmm2,%xmm7
+	vpclmulqdq	$0x11,%xmm0,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm7,%xmm7
+	vpxor	%xmm9,%xmm1,%xmm1
+	vpclmulqdq	$0x10,%xmm15,%xmm4,%xmm4
+	vmovdqu	128-32(%r9),%xmm15
+	vpxor	%xmm5,%xmm4,%xmm4
+
+	vpxor	112(%rsp),%xmm8,%xmm8
+	vpclmulqdq	$0x00,%xmm3,%xmm2,%xmm5
+	vmovdqu	112-32(%r9),%xmm0
+	vpunpckhqdq	%xmm8,%xmm8,%xmm9
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x11,%xmm3,%xmm2,%xmm2
+	vpxor	%xmm8,%xmm9,%xmm9
+	vpxor	%xmm1,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm15,%xmm7,%xmm7
+	vpxor	%xmm4,%xmm7,%xmm4
+
+	vpclmulqdq	$0x00,%xmm0,%xmm8,%xmm6
+	vmovdqu	0-32(%r9),%xmm3
+	vpunpckhqdq	%xmm14,%xmm14,%xmm1
+	vpclmulqdq	$0x11,%xmm0,%xmm8,%xmm8
+	vpxor	%xmm14,%xmm1,%xmm1
+	vpxor	%xmm5,%xmm6,%xmm5
+	vpclmulqdq	$0x10,%xmm15,%xmm9,%xmm9
+	vmovdqu	32-32(%r9),%xmm15
+	vpxor	%xmm2,%xmm8,%xmm7
+	vpxor	%xmm4,%xmm9,%xmm6
+
+	vmovdqu	16-32(%r9),%xmm0
+	vpxor	%xmm5,%xmm7,%xmm9
+	vpclmulqdq	$0x00,%xmm3,%xmm14,%xmm4
+	vpxor	%xmm9,%xmm6,%xmm6
+	vpunpckhqdq	%xmm13,%xmm13,%xmm2
+	vpclmulqdq	$0x11,%xmm3,%xmm14,%xmm14
+	vpxor	%xmm13,%xmm2,%xmm2
+	vpslldq	$8,%xmm6,%xmm9
+	vpclmulqdq	$0x00,%xmm15,%xmm1,%xmm1
+	vpxor	%xmm9,%xmm5,%xmm8
+	vpsrldq	$8,%xmm6,%xmm6
+	vpxor	%xmm6,%xmm7,%xmm7
+
+	vpclmulqdq	$0x00,%xmm0,%xmm13,%xmm5
+	vmovdqu	48-32(%r9),%xmm3
+	vpxor	%xmm4,%xmm5,%xmm5
+	vpunpckhqdq	%xmm12,%xmm12,%xmm9
+	vpclmulqdq	$0x11,%xmm0,%xmm13,%xmm13
+	vpxor	%xmm12,%xmm9,%xmm9
+	vpxor	%xmm14,%xmm13,%xmm13
+	vpalignr	$8,%xmm8,%xmm8,%xmm14
+	vpclmulqdq	$0x10,%xmm15,%xmm2,%xmm2
+	vmovdqu	80-32(%r9),%xmm15
+	vpxor	%xmm1,%xmm2,%xmm2
+
+	vpclmulqdq	$0x00,%xmm3,%xmm12,%xmm4
+	vmovdqu	64-32(%r9),%xmm0
+	vpxor	%xmm5,%xmm4,%xmm4
+	vpunpckhqdq	%xmm11,%xmm11,%xmm1
+	vpclmulqdq	$0x11,%xmm3,%xmm12,%xmm12
+	vpxor	%xmm11,%xmm1,%xmm1
+	vpxor	%xmm13,%xmm12,%xmm12
+	vxorps	16(%rsp),%xmm7,%xmm7
+	vpclmulqdq	$0x00,%xmm15,%xmm9,%xmm9
+	vpxor	%xmm2,%xmm9,%xmm9
+
+	vpclmulqdq	$0x10,16(%r11),%xmm8,%xmm8
+	vxorps	%xmm14,%xmm8,%xmm8
+
+	vpclmulqdq	$0x00,%xmm0,%xmm11,%xmm5
+	vmovdqu	96-32(%r9),%xmm3
+	vpxor	%xmm4,%xmm5,%xmm5
+	vpunpckhqdq	%xmm10,%xmm10,%xmm2
+	vpclmulqdq	$0x11,%xmm0,%xmm11,%xmm11
+	vpxor	%xmm10,%xmm2,%xmm2
+	vpalignr	$8,%xmm8,%xmm8,%xmm14
+	vpxor	%xmm12,%xmm11,%xmm11
+	vpclmulqdq	$0x10,%xmm15,%xmm1,%xmm1
+	vmovdqu	128-32(%r9),%xmm15
+	vpxor	%xmm9,%xmm1,%xmm1
+
+	vxorps	%xmm7,%xmm14,%xmm14
+	vpclmulqdq	$0x10,16(%r11),%xmm8,%xmm8
+	vxorps	%xmm14,%xmm8,%xmm8
+
+	vpclmulqdq	$0x00,%xmm3,%xmm10,%xmm4
+	vmovdqu	112-32(%r9),%xmm0
+	vpxor	%xmm5,%xmm4,%xmm4
+	vpunpckhqdq	%xmm8,%xmm8,%xmm9
+	vpclmulqdq	$0x11,%xmm3,%xmm10,%xmm10
+	vpxor	%xmm8,%xmm9,%xmm9
+	vpxor	%xmm11,%xmm10,%xmm10
+	vpclmulqdq	$0x00,%xmm15,%xmm2,%xmm2
+	vpxor	%xmm1,%xmm2,%xmm2
+
+	vpclmulqdq	$0x00,%xmm0,%xmm8,%xmm5
+	vpclmulqdq	$0x11,%xmm0,%xmm8,%xmm7
+	vpxor	%xmm4,%xmm5,%xmm5
+	vpclmulqdq	$0x10,%xmm15,%xmm9,%xmm6
+	vpxor	%xmm10,%xmm7,%xmm7
+	vpxor	%xmm2,%xmm6,%xmm6
+
+	vpxor	%xmm5,%xmm7,%xmm4
+	vpxor	%xmm4,%xmm6,%xmm6
+	vpslldq	$8,%xmm6,%xmm1
+	vmovdqu	16(%r11),%xmm3
+	vpsrldq	$8,%xmm6,%xmm6
+	vpxor	%xmm1,%xmm5,%xmm8
+	vpxor	%xmm6,%xmm7,%xmm7
+
+	vpalignr	$8,%xmm8,%xmm8,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm8,%xmm8
+	vpxor	%xmm2,%xmm8,%xmm8
+
+	vpalignr	$8,%xmm8,%xmm8,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm8,%xmm8
+	vpxor	%xmm7,%xmm2,%xmm2
+	vpxor	%xmm2,%xmm8,%xmm8
+	vpshufb	(%r11),%xmm8,%xmm8
+	vmovdqu	%xmm8,-64(%r9)
+
+	vzeroupper
+	movq	-48(%rax),%r15
+.cfi_restore	%r15
+	movq	-40(%rax),%r14
+.cfi_restore	%r14
+	movq	-32(%rax),%r13
+.cfi_restore	%r13
+	movq	-24(%rax),%r12
+.cfi_restore	%r12
+	movq	-16(%rax),%rbp
+.cfi_restore	%rbp
+	movq	-8(%rax),%rbx
+.cfi_restore	%rbx
+	leaq	(%rax),%rsp
+.cfi_def_cfa_register	%rsp
+.Lgcm_enc_abort:
+	movq	%r10,%rax
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	aesni_gcm_encrypt,.-aesni_gcm_encrypt
+.align	64
+.Lbswap_mask:
+.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.Lpoly:
+.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+.Lone_msb:
+.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
+.Ltwo_lsb:
+.byte	2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+.Lone_lsb:
+.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+.byte	65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	64
+#endif
+.section	.note.GNU-stack,"",@progbits
+#endif  // defined(__x86_64__) && defined(__linux__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/aesni-gcm-x86_64.mac.x86_64.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/aesni-gcm-x86_64.mac.x86_64.S
@ -0,0 +1,857 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__x86_64__) && defined(__APPLE__)
+# This file is generated from a similarly-named Perl script in the BoringSSL
+# source tree. Do not edit by hand.
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+#endif
+
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+.text	
+
+
+.p2align	5
+_aesni_ctr32_ghash_6x:
+
+	vmovdqu	32(%r11),%xmm2
+	subq	$6,%rdx
+	vpxor	%xmm4,%xmm4,%xmm4
+	vmovdqu	0-128(%rcx),%xmm15
+	vpaddb	%xmm2,%xmm1,%xmm10
+	vpaddb	%xmm2,%xmm10,%xmm11
+	vpaddb	%xmm2,%xmm11,%xmm12
+	vpaddb	%xmm2,%xmm12,%xmm13
+	vpaddb	%xmm2,%xmm13,%xmm14
+	vpxor	%xmm15,%xmm1,%xmm9
+	vmovdqu	%xmm4,16+8(%rsp)
+	jmp	L$oop6x
+
+.p2align	5
+L$oop6x:
+	addl	$100663296,%ebx
+	jc	L$handle_ctr32
+	vmovdqu	0-32(%r9),%xmm3
+	vpaddb	%xmm2,%xmm14,%xmm1
+	vpxor	%xmm15,%xmm10,%xmm10
+	vpxor	%xmm15,%xmm11,%xmm11
+
+L$resume_ctr32:
+	vmovdqu	%xmm1,(%r8)
+	vpclmulqdq	$0x10,%xmm3,%xmm7,%xmm5
+	vpxor	%xmm15,%xmm12,%xmm12
+	vmovups	16-128(%rcx),%xmm2
+	vpclmulqdq	$0x01,%xmm3,%xmm7,%xmm6
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+	xorq	%r12,%r12
+	cmpq	%r14,%r15
+
+	vaesenc	%xmm2,%xmm9,%xmm9
+	vmovdqu	48+8(%rsp),%xmm0
+	vpxor	%xmm15,%xmm13,%xmm13
+	vpclmulqdq	$0x00,%xmm3,%xmm7,%xmm1
+	vaesenc	%xmm2,%xmm10,%xmm10
+	vpxor	%xmm15,%xmm14,%xmm14
+	setnc	%r12b
+	vpclmulqdq	$0x11,%xmm3,%xmm7,%xmm7
+	vaesenc	%xmm2,%xmm11,%xmm11
+	vmovdqu	16-32(%r9),%xmm3
+	negq	%r12
+	vaesenc	%xmm2,%xmm12,%xmm12
+	vpxor	%xmm5,%xmm6,%xmm6
+	vpclmulqdq	$0x00,%xmm3,%xmm0,%xmm5
+	vpxor	%xmm4,%xmm8,%xmm8
+	vaesenc	%xmm2,%xmm13,%xmm13
+	vpxor	%xmm5,%xmm1,%xmm4
+	andq	$0x60,%r12
+	vmovups	32-128(%rcx),%xmm15
+	vpclmulqdq	$0x10,%xmm3,%xmm0,%xmm1
+	vaesenc	%xmm2,%xmm14,%xmm14
+
+	vpclmulqdq	$0x01,%xmm3,%xmm0,%xmm2
+	leaq	(%r14,%r12,1),%r14
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	16+8(%rsp),%xmm8,%xmm8
+	vpclmulqdq	$0x11,%xmm3,%xmm0,%xmm3
+	vmovdqu	64+8(%rsp),%xmm0
+	vaesenc	%xmm15,%xmm10,%xmm10
+	movbeq	88(%r14),%r13
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	80(%r14),%r12
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r13,32+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	movq	%r12,40+8(%rsp)
+	vmovdqu	48-32(%r9),%xmm5
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vmovups	48-128(%rcx),%xmm15
+	vpxor	%xmm1,%xmm6,%xmm6
+	vpclmulqdq	$0x00,%xmm5,%xmm0,%xmm1
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm2,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm5,%xmm0,%xmm2
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vpxor	%xmm3,%xmm7,%xmm7
+	vpclmulqdq	$0x01,%xmm5,%xmm0,%xmm3
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vpclmulqdq	$0x11,%xmm5,%xmm0,%xmm5
+	vmovdqu	80+8(%rsp),%xmm0
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vpxor	%xmm1,%xmm4,%xmm4
+	vmovdqu	64-32(%r9),%xmm1
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vmovups	64-128(%rcx),%xmm15
+	vpxor	%xmm2,%xmm6,%xmm6
+	vpclmulqdq	$0x00,%xmm1,%xmm0,%xmm2
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm3,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm1,%xmm0,%xmm3
+	vaesenc	%xmm15,%xmm10,%xmm10
+	movbeq	72(%r14),%r13
+	vpxor	%xmm5,%xmm7,%xmm7
+	vpclmulqdq	$0x01,%xmm1,%xmm0,%xmm5
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	64(%r14),%r12
+	vpclmulqdq	$0x11,%xmm1,%xmm0,%xmm1
+	vmovdqu	96+8(%rsp),%xmm0
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r13,48+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	movq	%r12,56+8(%rsp)
+	vpxor	%xmm2,%xmm4,%xmm4
+	vmovdqu	96-32(%r9),%xmm2
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vmovups	80-128(%rcx),%xmm15
+	vpxor	%xmm3,%xmm6,%xmm6
+	vpclmulqdq	$0x00,%xmm2,%xmm0,%xmm3
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm5,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm2,%xmm0,%xmm5
+	vaesenc	%xmm15,%xmm10,%xmm10
+	movbeq	56(%r14),%r13
+	vpxor	%xmm1,%xmm7,%xmm7
+	vpclmulqdq	$0x01,%xmm2,%xmm0,%xmm1
+	vpxor	112+8(%rsp),%xmm8,%xmm8
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	48(%r14),%r12
+	vpclmulqdq	$0x11,%xmm2,%xmm0,%xmm2
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r13,64+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	movq	%r12,72+8(%rsp)
+	vpxor	%xmm3,%xmm4,%xmm4
+	vmovdqu	112-32(%r9),%xmm3
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vmovups	96-128(%rcx),%xmm15
+	vpxor	%xmm5,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm3,%xmm8,%xmm5
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm1,%xmm6,%xmm6
+	vpclmulqdq	$0x01,%xmm3,%xmm8,%xmm1
+	vaesenc	%xmm15,%xmm10,%xmm10
+	movbeq	40(%r14),%r13
+	vpxor	%xmm2,%xmm7,%xmm7
+	vpclmulqdq	$0x00,%xmm3,%xmm8,%xmm2
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	32(%r14),%r12
+	vpclmulqdq	$0x11,%xmm3,%xmm8,%xmm8
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r13,80+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	movq	%r12,88+8(%rsp)
+	vpxor	%xmm5,%xmm6,%xmm6
+	vaesenc	%xmm15,%xmm14,%xmm14
+	vpxor	%xmm1,%xmm6,%xmm6
+
+	vmovups	112-128(%rcx),%xmm15
+	vpslldq	$8,%xmm6,%xmm5
+	vpxor	%xmm2,%xmm4,%xmm4
+	vmovdqu	16(%r11),%xmm3
+
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	%xmm8,%xmm7,%xmm7
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vpxor	%xmm5,%xmm4,%xmm4
+	movbeq	24(%r14),%r13
+	vaesenc	%xmm15,%xmm11,%xmm11
+	movbeq	16(%r14),%r12
+	vpalignr	$8,%xmm4,%xmm4,%xmm0
+	vpclmulqdq	$0x10,%xmm3,%xmm4,%xmm4
+	movq	%r13,96+8(%rsp)
+	vaesenc	%xmm15,%xmm12,%xmm12
+	movq	%r12,104+8(%rsp)
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vmovups	128-128(%rcx),%xmm1
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vmovups	144-128(%rcx),%xmm15
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vpsrldq	$8,%xmm6,%xmm6
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vpxor	%xmm6,%xmm7,%xmm7
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vpxor	%xmm0,%xmm4,%xmm4
+	movbeq	8(%r14),%r13
+	vaesenc	%xmm1,%xmm13,%xmm13
+	movbeq	0(%r14),%r12
+	vaesenc	%xmm1,%xmm14,%xmm14
+	vmovups	160-128(%rcx),%xmm1
+	cmpl	$11,%ebp
+	jb	L$enc_tail
+
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+	vmovups	176-128(%rcx),%xmm15
+	vaesenc	%xmm1,%xmm14,%xmm14
+	vmovups	192-128(%rcx),%xmm1
+	je	L$enc_tail
+
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vaesenc	%xmm15,%xmm14,%xmm14
+
+	vaesenc	%xmm1,%xmm9,%xmm9
+	vaesenc	%xmm1,%xmm10,%xmm10
+	vaesenc	%xmm1,%xmm11,%xmm11
+	vaesenc	%xmm1,%xmm12,%xmm12
+	vaesenc	%xmm1,%xmm13,%xmm13
+	vmovups	208-128(%rcx),%xmm15
+	vaesenc	%xmm1,%xmm14,%xmm14
+	vmovups	224-128(%rcx),%xmm1
+	jmp	L$enc_tail
+
+.p2align	5
+L$handle_ctr32:
+	vmovdqu	(%r11),%xmm0
+	vpshufb	%xmm0,%xmm1,%xmm6
+	vmovdqu	48(%r11),%xmm5
+	vpaddd	64(%r11),%xmm6,%xmm10
+	vpaddd	%xmm5,%xmm6,%xmm11
+	vmovdqu	0-32(%r9),%xmm3
+	vpaddd	%xmm5,%xmm10,%xmm12
+	vpshufb	%xmm0,%xmm10,%xmm10
+	vpaddd	%xmm5,%xmm11,%xmm13
+	vpshufb	%xmm0,%xmm11,%xmm11
+	vpxor	%xmm15,%xmm10,%xmm10
+	vpaddd	%xmm5,%xmm12,%xmm14
+	vpshufb	%xmm0,%xmm12,%xmm12
+	vpxor	%xmm15,%xmm11,%xmm11
+	vpaddd	%xmm5,%xmm13,%xmm1
+	vpshufb	%xmm0,%xmm13,%xmm13
+	vpshufb	%xmm0,%xmm14,%xmm14
+	vpshufb	%xmm0,%xmm1,%xmm1
+	jmp	L$resume_ctr32
+
+.p2align	5
+L$enc_tail:
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vmovdqu	%xmm7,16+8(%rsp)
+	vpalignr	$8,%xmm4,%xmm4,%xmm8
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vpclmulqdq	$0x10,%xmm3,%xmm4,%xmm4
+	vpxor	0(%rdi),%xmm1,%xmm2
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vpxor	16(%rdi),%xmm1,%xmm0
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vpxor	32(%rdi),%xmm1,%xmm5
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vpxor	48(%rdi),%xmm1,%xmm6
+	vaesenc	%xmm15,%xmm14,%xmm14
+	vpxor	64(%rdi),%xmm1,%xmm7
+	vpxor	80(%rdi),%xmm1,%xmm3
+	vmovdqu	(%r8),%xmm1
+
+	vaesenclast	%xmm2,%xmm9,%xmm9
+	vmovdqu	32(%r11),%xmm2
+	vaesenclast	%xmm0,%xmm10,%xmm10
+	vpaddb	%xmm2,%xmm1,%xmm0
+	movq	%r13,112+8(%rsp)
+	leaq	96(%rdi),%rdi
+	vaesenclast	%xmm5,%xmm11,%xmm11
+	vpaddb	%xmm2,%xmm0,%xmm5
+	movq	%r12,120+8(%rsp)
+	leaq	96(%rsi),%rsi
+	vmovdqu	0-128(%rcx),%xmm15
+	vaesenclast	%xmm6,%xmm12,%xmm12
+	vpaddb	%xmm2,%xmm5,%xmm6
+	vaesenclast	%xmm7,%xmm13,%xmm13
+	vpaddb	%xmm2,%xmm6,%xmm7
+	vaesenclast	%xmm3,%xmm14,%xmm14
+	vpaddb	%xmm2,%xmm7,%xmm3
+
+	addq	$0x60,%r10
+	subq	$0x6,%rdx
+	jc	L$6x_done
+
+	vmovups	%xmm9,-96(%rsi)
+	vpxor	%xmm15,%xmm1,%xmm9
+	vmovups	%xmm10,-80(%rsi)
+	vmovdqa	%xmm0,%xmm10
+	vmovups	%xmm11,-64(%rsi)
+	vmovdqa	%xmm5,%xmm11
+	vmovups	%xmm12,-48(%rsi)
+	vmovdqa	%xmm6,%xmm12
+	vmovups	%xmm13,-32(%rsi)
+	vmovdqa	%xmm7,%xmm13
+	vmovups	%xmm14,-16(%rsi)
+	vmovdqa	%xmm3,%xmm14
+	vmovdqu	32+8(%rsp),%xmm7
+	jmp	L$oop6x
+
+L$6x_done:
+	vpxor	16+8(%rsp),%xmm8,%xmm8
+	vpxor	%xmm4,%xmm8,%xmm8
+
+	.byte	0xf3,0xc3
+
+
+.globl	_aesni_gcm_decrypt
+.private_extern _aesni_gcm_decrypt
+
+.p2align	5
+_aesni_gcm_decrypt:
+
+	xorq	%r10,%r10
+
+
+
+	cmpq	$0x60,%rdx
+	jb	L$gcm_dec_abort
+
+	leaq	(%rsp),%rax
+
+	pushq	%rbx
+
+	pushq	%rbp
+
+	pushq	%r12
+
+	pushq	%r13
+
+	pushq	%r14
+
+	pushq	%r15
+
+	vzeroupper
+
+	vmovdqu	(%r8),%xmm1
+	addq	$-128,%rsp
+	movl	12(%r8),%ebx
+	leaq	L$bswap_mask(%rip),%r11
+	leaq	-128(%rcx),%r14
+	movq	$0xf80,%r15
+	vmovdqu	(%r9),%xmm8
+	andq	$-128,%rsp
+	vmovdqu	(%r11),%xmm0
+	leaq	128(%rcx),%rcx
+	leaq	32+32(%r9),%r9
+	movl	240-128(%rcx),%ebp
+	vpshufb	%xmm0,%xmm8,%xmm8
+
+	andq	%r15,%r14
+	andq	%rsp,%r15
+	subq	%r14,%r15
+	jc	L$dec_no_key_aliasing
+	cmpq	$768,%r15
+	jnc	L$dec_no_key_aliasing
+	subq	%r15,%rsp
+L$dec_no_key_aliasing:
+
+	vmovdqu	80(%rdi),%xmm7
+	leaq	(%rdi),%r14
+	vmovdqu	64(%rdi),%xmm4
+
+
+
+
+
+
+
+	leaq	-192(%rdi,%rdx,1),%r15
+
+	vmovdqu	48(%rdi),%xmm5
+	shrq	$4,%rdx
+	xorq	%r10,%r10
+	vmovdqu	32(%rdi),%xmm6
+	vpshufb	%xmm0,%xmm7,%xmm7
+	vmovdqu	16(%rdi),%xmm2
+	vpshufb	%xmm0,%xmm4,%xmm4
+	vmovdqu	(%rdi),%xmm3
+	vpshufb	%xmm0,%xmm5,%xmm5
+	vmovdqu	%xmm4,48(%rsp)
+	vpshufb	%xmm0,%xmm6,%xmm6
+	vmovdqu	%xmm5,64(%rsp)
+	vpshufb	%xmm0,%xmm2,%xmm2
+	vmovdqu	%xmm6,80(%rsp)
+	vpshufb	%xmm0,%xmm3,%xmm3
+	vmovdqu	%xmm2,96(%rsp)
+	vmovdqu	%xmm3,112(%rsp)
+
+	call	_aesni_ctr32_ghash_6x
+
+	vmovups	%xmm9,-96(%rsi)
+	vmovups	%xmm10,-80(%rsi)
+	vmovups	%xmm11,-64(%rsi)
+	vmovups	%xmm12,-48(%rsi)
+	vmovups	%xmm13,-32(%rsi)
+	vmovups	%xmm14,-16(%rsi)
+
+	vpshufb	(%r11),%xmm8,%xmm8
+	vmovdqu	%xmm8,-64(%r9)
+
+	vzeroupper
+	movq	-48(%rax),%r15
+
+	movq	-40(%rax),%r14
+
+	movq	-32(%rax),%r13
+
+	movq	-24(%rax),%r12
+
+	movq	-16(%rax),%rbp
+
+	movq	-8(%rax),%rbx
+
+	leaq	(%rax),%rsp
+
+L$gcm_dec_abort:
+	movq	%r10,%rax
+	.byte	0xf3,0xc3
+
+
+
+.p2align	5
+_aesni_ctr32_6x:
+
+	vmovdqu	0-128(%rcx),%xmm4
+	vmovdqu	32(%r11),%xmm2
+	leaq	-1(%rbp),%r13
+	vmovups	16-128(%rcx),%xmm15
+	leaq	32-128(%rcx),%r12
+	vpxor	%xmm4,%xmm1,%xmm9
+	addl	$100663296,%ebx
+	jc	L$handle_ctr32_2
+	vpaddb	%xmm2,%xmm1,%xmm10
+	vpaddb	%xmm2,%xmm10,%xmm11
+	vpxor	%xmm4,%xmm10,%xmm10
+	vpaddb	%xmm2,%xmm11,%xmm12
+	vpxor	%xmm4,%xmm11,%xmm11
+	vpaddb	%xmm2,%xmm12,%xmm13
+	vpxor	%xmm4,%xmm12,%xmm12
+	vpaddb	%xmm2,%xmm13,%xmm14
+	vpxor	%xmm4,%xmm13,%xmm13
+	vpaddb	%xmm2,%xmm14,%xmm1
+	vpxor	%xmm4,%xmm14,%xmm14
+	jmp	L$oop_ctr32
+
+.p2align	4
+L$oop_ctr32:
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vaesenc	%xmm15,%xmm14,%xmm14
+	vmovups	(%r12),%xmm15
+	leaq	16(%r12),%r12
+	decl	%r13d
+	jnz	L$oop_ctr32
+
+	vmovdqu	(%r12),%xmm3
+	vaesenc	%xmm15,%xmm9,%xmm9
+	vpxor	0(%rdi),%xmm3,%xmm4
+	vaesenc	%xmm15,%xmm10,%xmm10
+	vpxor	16(%rdi),%xmm3,%xmm5
+	vaesenc	%xmm15,%xmm11,%xmm11
+	vpxor	32(%rdi),%xmm3,%xmm6
+	vaesenc	%xmm15,%xmm12,%xmm12
+	vpxor	48(%rdi),%xmm3,%xmm8
+	vaesenc	%xmm15,%xmm13,%xmm13
+	vpxor	64(%rdi),%xmm3,%xmm2
+	vaesenc	%xmm15,%xmm14,%xmm14
+	vpxor	80(%rdi),%xmm3,%xmm3
+	leaq	96(%rdi),%rdi
+
+	vaesenclast	%xmm4,%xmm9,%xmm9
+	vaesenclast	%xmm5,%xmm10,%xmm10
+	vaesenclast	%xmm6,%xmm11,%xmm11
+	vaesenclast	%xmm8,%xmm12,%xmm12
+	vaesenclast	%xmm2,%xmm13,%xmm13
+	vaesenclast	%xmm3,%xmm14,%xmm14
+	vmovups	%xmm9,0(%rsi)
+	vmovups	%xmm10,16(%rsi)
+	vmovups	%xmm11,32(%rsi)
+	vmovups	%xmm12,48(%rsi)
+	vmovups	%xmm13,64(%rsi)
+	vmovups	%xmm14,80(%rsi)
+	leaq	96(%rsi),%rsi
+
+	.byte	0xf3,0xc3
+.p2align	5
+L$handle_ctr32_2:
+	vpshufb	%xmm0,%xmm1,%xmm6
+	vmovdqu	48(%r11),%xmm5
+	vpaddd	64(%r11),%xmm6,%xmm10
+	vpaddd	%xmm5,%xmm6,%xmm11
+	vpaddd	%xmm5,%xmm10,%xmm12
+	vpshufb	%xmm0,%xmm10,%xmm10
+	vpaddd	%xmm5,%xmm11,%xmm13
+	vpshufb	%xmm0,%xmm11,%xmm11
+	vpxor	%xmm4,%xmm10,%xmm10
+	vpaddd	%xmm5,%xmm12,%xmm14
+	vpshufb	%xmm0,%xmm12,%xmm12
+	vpxor	%xmm4,%xmm11,%xmm11
+	vpaddd	%xmm5,%xmm13,%xmm1
+	vpshufb	%xmm0,%xmm13,%xmm13
+	vpxor	%xmm4,%xmm12,%xmm12
+	vpshufb	%xmm0,%xmm14,%xmm14
+	vpxor	%xmm4,%xmm13,%xmm13
+	vpshufb	%xmm0,%xmm1,%xmm1
+	vpxor	%xmm4,%xmm14,%xmm14
+	jmp	L$oop_ctr32
+
+
+
+.globl	_aesni_gcm_encrypt
+.private_extern _aesni_gcm_encrypt
+
+.p2align	5
+_aesni_gcm_encrypt:
+
+#ifdef BORINGSSL_DISPATCH_TEST
+
+	movb	$1,_BORINGSSL_function_hit+2(%rip)
+#endif
+	xorq	%r10,%r10
+
+
+
+
+	cmpq	$288,%rdx
+	jb	L$gcm_enc_abort
+
+	leaq	(%rsp),%rax
+
+	pushq	%rbx
+
+	pushq	%rbp
+
+	pushq	%r12
+
+	pushq	%r13
+
+	pushq	%r14
+
+	pushq	%r15
+
+	vzeroupper
+
+	vmovdqu	(%r8),%xmm1
+	addq	$-128,%rsp
+	movl	12(%r8),%ebx
+	leaq	L$bswap_mask(%rip),%r11
+	leaq	-128(%rcx),%r14
+	movq	$0xf80,%r15
+	leaq	128(%rcx),%rcx
+	vmovdqu	(%r11),%xmm0
+	andq	$-128,%rsp
+	movl	240-128(%rcx),%ebp
+
+	andq	%r15,%r14
+	andq	%rsp,%r15
+	subq	%r14,%r15
+	jc	L$enc_no_key_aliasing
+	cmpq	$768,%r15
+	jnc	L$enc_no_key_aliasing
+	subq	%r15,%rsp
+L$enc_no_key_aliasing:
+
+	leaq	(%rsi),%r14
+
+
+
+
+
+
+
+
+	leaq	-192(%rsi,%rdx,1),%r15
+
+	shrq	$4,%rdx
+
+	call	_aesni_ctr32_6x
+	vpshufb	%xmm0,%xmm9,%xmm8
+	vpshufb	%xmm0,%xmm10,%xmm2
+	vmovdqu	%xmm8,112(%rsp)
+	vpshufb	%xmm0,%xmm11,%xmm4
+	vmovdqu	%xmm2,96(%rsp)
+	vpshufb	%xmm0,%xmm12,%xmm5
+	vmovdqu	%xmm4,80(%rsp)
+	vpshufb	%xmm0,%xmm13,%xmm6
+	vmovdqu	%xmm5,64(%rsp)
+	vpshufb	%xmm0,%xmm14,%xmm7
+	vmovdqu	%xmm6,48(%rsp)
+
+	call	_aesni_ctr32_6x
+
+	vmovdqu	(%r9),%xmm8
+	leaq	32+32(%r9),%r9
+	subq	$12,%rdx
+	movq	$192,%r10
+	vpshufb	%xmm0,%xmm8,%xmm8
+
+	call	_aesni_ctr32_ghash_6x
+	vmovdqu	32(%rsp),%xmm7
+	vmovdqu	(%r11),%xmm0
+	vmovdqu	0-32(%r9),%xmm3
+	vpunpckhqdq	%xmm7,%xmm7,%xmm1
+	vmovdqu	32-32(%r9),%xmm15
+	vmovups	%xmm9,-96(%rsi)
+	vpshufb	%xmm0,%xmm9,%xmm9
+	vpxor	%xmm7,%xmm1,%xmm1
+	vmovups	%xmm10,-80(%rsi)
+	vpshufb	%xmm0,%xmm10,%xmm10
+	vmovups	%xmm11,-64(%rsi)
+	vpshufb	%xmm0,%xmm11,%xmm11
+	vmovups	%xmm12,-48(%rsi)
+	vpshufb	%xmm0,%xmm12,%xmm12
+	vmovups	%xmm13,-32(%rsi)
+	vpshufb	%xmm0,%xmm13,%xmm13
+	vmovups	%xmm14,-16(%rsi)
+	vpshufb	%xmm0,%xmm14,%xmm14
+	vmovdqu	%xmm9,16(%rsp)
+	vmovdqu	48(%rsp),%xmm6
+	vmovdqu	16-32(%r9),%xmm0
+	vpunpckhqdq	%xmm6,%xmm6,%xmm2
+	vpclmulqdq	$0x00,%xmm3,%xmm7,%xmm5
+	vpxor	%xmm6,%xmm2,%xmm2
+	vpclmulqdq	$0x11,%xmm3,%xmm7,%xmm7
+	vpclmulqdq	$0x00,%xmm15,%xmm1,%xmm1
+
+	vmovdqu	64(%rsp),%xmm9
+	vpclmulqdq	$0x00,%xmm0,%xmm6,%xmm4
+	vmovdqu	48-32(%r9),%xmm3
+	vpxor	%xmm5,%xmm4,%xmm4
+	vpunpckhqdq	%xmm9,%xmm9,%xmm5
+	vpclmulqdq	$0x11,%xmm0,%xmm6,%xmm6
+	vpxor	%xmm9,%xmm5,%xmm5
+	vpxor	%xmm7,%xmm6,%xmm6
+	vpclmulqdq	$0x10,%xmm15,%xmm2,%xmm2
+	vmovdqu	80-32(%r9),%xmm15
+	vpxor	%xmm1,%xmm2,%xmm2
+
+	vmovdqu	80(%rsp),%xmm1
+	vpclmulqdq	$0x00,%xmm3,%xmm9,%xmm7
+	vmovdqu	64-32(%r9),%xmm0
+	vpxor	%xmm4,%xmm7,%xmm7
+	vpunpckhqdq	%xmm1,%xmm1,%xmm4
+	vpclmulqdq	$0x11,%xmm3,%xmm9,%xmm9
+	vpxor	%xmm1,%xmm4,%xmm4
+	vpxor	%xmm6,%xmm9,%xmm9
+	vpclmulqdq	$0x00,%xmm15,%xmm5,%xmm5
+	vpxor	%xmm2,%xmm5,%xmm5
+
+	vmovdqu	96(%rsp),%xmm2
+	vpclmulqdq	$0x00,%xmm0,%xmm1,%xmm6
+	vmovdqu	96-32(%r9),%xmm3
+	vpxor	%xmm7,%xmm6,%xmm6
+	vpunpckhqdq	%xmm2,%xmm2,%xmm7
+	vpclmulqdq	$0x11,%xmm0,%xmm1,%xmm1
+	vpxor	%xmm2,%xmm7,%xmm7
+	vpxor	%xmm9,%xmm1,%xmm1
+	vpclmulqdq	$0x10,%xmm15,%xmm4,%xmm4
+	vmovdqu	128-32(%r9),%xmm15
+	vpxor	%xmm5,%xmm4,%xmm4
+
+	vpxor	112(%rsp),%xmm8,%xmm8
+	vpclmulqdq	$0x00,%xmm3,%xmm2,%xmm5
+	vmovdqu	112-32(%r9),%xmm0
+	vpunpckhqdq	%xmm8,%xmm8,%xmm9
+	vpxor	%xmm6,%xmm5,%xmm5
+	vpclmulqdq	$0x11,%xmm3,%xmm2,%xmm2
+	vpxor	%xmm8,%xmm9,%xmm9
+	vpxor	%xmm1,%xmm2,%xmm2
+	vpclmulqdq	$0x00,%xmm15,%xmm7,%xmm7
+	vpxor	%xmm4,%xmm7,%xmm4
+
+	vpclmulqdq	$0x00,%xmm0,%xmm8,%xmm6
+	vmovdqu	0-32(%r9),%xmm3
+	vpunpckhqdq	%xmm14,%xmm14,%xmm1
+	vpclmulqdq	$0x11,%xmm0,%xmm8,%xmm8
+	vpxor	%xmm14,%xmm1,%xmm1
+	vpxor	%xmm5,%xmm6,%xmm5
+	vpclmulqdq	$0x10,%xmm15,%xmm9,%xmm9
+	vmovdqu	32-32(%r9),%xmm15
+	vpxor	%xmm2,%xmm8,%xmm7
+	vpxor	%xmm4,%xmm9,%xmm6
+
+	vmovdqu	16-32(%r9),%xmm0
+	vpxor	%xmm5,%xmm7,%xmm9
+	vpclmulqdq	$0x00,%xmm3,%xmm14,%xmm4
+	vpxor	%xmm9,%xmm6,%xmm6
+	vpunpckhqdq	%xmm13,%xmm13,%xmm2
+	vpclmulqdq	$0x11,%xmm3,%xmm14,%xmm14
+	vpxor	%xmm13,%xmm2,%xmm2
+	vpslldq	$8,%xmm6,%xmm9
+	vpclmulqdq	$0x00,%xmm15,%xmm1,%xmm1
+	vpxor	%xmm9,%xmm5,%xmm8
+	vpsrldq	$8,%xmm6,%xmm6
+	vpxor	%xmm6,%xmm7,%xmm7
+
+	vpclmulqdq	$0x00,%xmm0,%xmm13,%xmm5
+	vmovdqu	48-32(%r9),%xmm3
+	vpxor	%xmm4,%xmm5,%xmm5
+	vpunpckhqdq	%xmm12,%xmm12,%xmm9
+	vpclmulqdq	$0x11,%xmm0,%xmm13,%xmm13
+	vpxor	%xmm12,%xmm9,%xmm9
+	vpxor	%xmm14,%xmm13,%xmm13
+	vpalignr	$8,%xmm8,%xmm8,%xmm14
+	vpclmulqdq	$0x10,%xmm15,%xmm2,%xmm2
+	vmovdqu	80-32(%r9),%xmm15
+	vpxor	%xmm1,%xmm2,%xmm2
+
+	vpclmulqdq	$0x00,%xmm3,%xmm12,%xmm4
+	vmovdqu	64-32(%r9),%xmm0
+	vpxor	%xmm5,%xmm4,%xmm4
+	vpunpckhqdq	%xmm11,%xmm11,%xmm1
+	vpclmulqdq	$0x11,%xmm3,%xmm12,%xmm12
+	vpxor	%xmm11,%xmm1,%xmm1
+	vpxor	%xmm13,%xmm12,%xmm12
+	vxorps	16(%rsp),%xmm7,%xmm7
+	vpclmulqdq	$0x00,%xmm15,%xmm9,%xmm9
+	vpxor	%xmm2,%xmm9,%xmm9
+
+	vpclmulqdq	$0x10,16(%r11),%xmm8,%xmm8
+	vxorps	%xmm14,%xmm8,%xmm8
+
+	vpclmulqdq	$0x00,%xmm0,%xmm11,%xmm5
+	vmovdqu	96-32(%r9),%xmm3
+	vpxor	%xmm4,%xmm5,%xmm5
+	vpunpckhqdq	%xmm10,%xmm10,%xmm2
+	vpclmulqdq	$0x11,%xmm0,%xmm11,%xmm11
+	vpxor	%xmm10,%xmm2,%xmm2
+	vpalignr	$8,%xmm8,%xmm8,%xmm14
+	vpxor	%xmm12,%xmm11,%xmm11
+	vpclmulqdq	$0x10,%xmm15,%xmm1,%xmm1
+	vmovdqu	128-32(%r9),%xmm15
+	vpxor	%xmm9,%xmm1,%xmm1
+
+	vxorps	%xmm7,%xmm14,%xmm14
+	vpclmulqdq	$0x10,16(%r11),%xmm8,%xmm8
+	vxorps	%xmm14,%xmm8,%xmm8
+
+	vpclmulqdq	$0x00,%xmm3,%xmm10,%xmm4
+	vmovdqu	112-32(%r9),%xmm0
+	vpxor	%xmm5,%xmm4,%xmm4
+	vpunpckhqdq	%xmm8,%xmm8,%xmm9
+	vpclmulqdq	$0x11,%xmm3,%xmm10,%xmm10
+	vpxor	%xmm8,%xmm9,%xmm9
+	vpxor	%xmm11,%xmm10,%xmm10
+	vpclmulqdq	$0x00,%xmm15,%xmm2,%xmm2
+	vpxor	%xmm1,%xmm2,%xmm2
+
+	vpclmulqdq	$0x00,%xmm0,%xmm8,%xmm5
+	vpclmulqdq	$0x11,%xmm0,%xmm8,%xmm7
+	vpxor	%xmm4,%xmm5,%xmm5
+	vpclmulqdq	$0x10,%xmm15,%xmm9,%xmm6
+	vpxor	%xmm10,%xmm7,%xmm7
+	vpxor	%xmm2,%xmm6,%xmm6
+
+	vpxor	%xmm5,%xmm7,%xmm4
+	vpxor	%xmm4,%xmm6,%xmm6
+	vpslldq	$8,%xmm6,%xmm1
+	vmovdqu	16(%r11),%xmm3
+	vpsrldq	$8,%xmm6,%xmm6
+	vpxor	%xmm1,%xmm5,%xmm8
+	vpxor	%xmm6,%xmm7,%xmm7
+
+	vpalignr	$8,%xmm8,%xmm8,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm8,%xmm8
+	vpxor	%xmm2,%xmm8,%xmm8
+
+	vpalignr	$8,%xmm8,%xmm8,%xmm2
+	vpclmulqdq	$0x10,%xmm3,%xmm8,%xmm8
+	vpxor	%xmm7,%xmm2,%xmm2
+	vpxor	%xmm2,%xmm8,%xmm8
+	vpshufb	(%r11),%xmm8,%xmm8
+	vmovdqu	%xmm8,-64(%r9)
+
+	vzeroupper
+	movq	-48(%rax),%r15
+
+	movq	-40(%rax),%r14
+
+	movq	-32(%rax),%r13
+
+	movq	-24(%rax),%r12
+
+	movq	-16(%rax),%rbp
+
+	movq	-8(%rax),%rbx
+
+	leaq	(%rax),%rsp
+
+L$gcm_enc_abort:
+	movq	%r10,%rax
+	.byte	0xf3,0xc3
+
+
+.p2align	6
+L$bswap_mask:
+.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+L$poly:
+.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+L$one_msb:
+.byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
+L$two_lsb:
+.byte	2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+L$one_lsb:
+.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+.byte	65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.p2align	6
+#endif
+#endif  // defined(__x86_64__) && defined(__APPLE__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/aesni-x86.linux.x86.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/aesni-x86.linux.x86.S
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/aesni-x86_64.linux.x86_64.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/aesni-x86_64.linux.x86_64.S
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/aesni-x86_64.mac.x86_64.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/aesni-x86_64.mac.x86_64.S
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/aesv8-armx32.ios.arm.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/aesv8-armx32.ios.arm.S
@ -0,0 +1,797 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__arm__) && defined(__APPLE__)
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+#include <CBigNumBoringSSL_arm_arch.h>
+
+#if __ARM_MAX_ARCH__>=7
+.text
+
+
+.code	32
+#undef	__thumb2__
+.align	5
+Lrcon:
+.long	0x01,0x01,0x01,0x01
+.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
+.long	0x1b,0x1b,0x1b,0x1b
+
+.text
+
+.globl	_aes_hw_set_encrypt_key
+.private_extern	_aes_hw_set_encrypt_key
+#ifdef __thumb2__
+.thumb_func	_aes_hw_set_encrypt_key
+#endif
+.align	5
+_aes_hw_set_encrypt_key:
+Lenc_key:
+	mov	r3,#-1
+	cmp	r0,#0
+	beq	Lenc_key_abort
+	cmp	r2,#0
+	beq	Lenc_key_abort
+	mov	r3,#-2
+	cmp	r1,#128
+	blt	Lenc_key_abort
+	cmp	r1,#256
+	bgt	Lenc_key_abort
+	tst	r1,#0x3f
+	bne	Lenc_key_abort
+
+	adr	r3,Lrcon
+	cmp	r1,#192
+
+	veor	q0,q0,q0
+	vld1.8	{q3},[r0]!
+	mov	r1,#8		@ reuse r1
+	vld1.32	{q1,q2},[r3]!
+
+	blt	Loop128
+	beq	L192
+	b	L256
+
+.align	4
+Loop128:
+	vtbl.8	d20,{q3},d4
+	vtbl.8	d21,{q3},d5
+	vext.8	q9,q0,q3,#12
+	vst1.32	{q3},[r2]!
+.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
+	subs	r1,r1,#1
+
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q10,q10,q1
+	veor	q3,q3,q9
+	vshl.u8	q1,q1,#1
+	veor	q3,q3,q10
+	bne	Loop128
+
+	vld1.32	{q1},[r3]
+
+	vtbl.8	d20,{q3},d4
+	vtbl.8	d21,{q3},d5
+	vext.8	q9,q0,q3,#12
+	vst1.32	{q3},[r2]!
+.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
+
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q10,q10,q1
+	veor	q3,q3,q9
+	vshl.u8	q1,q1,#1
+	veor	q3,q3,q10
+
+	vtbl.8	d20,{q3},d4
+	vtbl.8	d21,{q3},d5
+	vext.8	q9,q0,q3,#12
+	vst1.32	{q3},[r2]!
+.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
+
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q10,q10,q1
+	veor	q3,q3,q9
+	veor	q3,q3,q10
+	vst1.32	{q3},[r2]
+	add	r2,r2,#0x50
+
+	mov	r12,#10
+	b	Ldone
+
+.align	4
+L192:
+	vld1.8	{d16},[r0]!
+	vmov.i8	q10,#8			@ borrow q10
+	vst1.32	{q3},[r2]!
+	vsub.i8	q2,q2,q10	@ adjust the mask
+
+Loop192:
+	vtbl.8	d20,{q8},d4
+	vtbl.8	d21,{q8},d5
+	vext.8	q9,q0,q3,#12
+	vst1.32	{d16},[r2]!
+.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
+	subs	r1,r1,#1
+
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q3,q3,q9
+
+	vdup.32	q9,d7[1]
+	veor	q9,q9,q8
+	veor	q10,q10,q1
+	vext.8	q8,q0,q8,#12
+	vshl.u8	q1,q1,#1
+	veor	q8,q8,q9
+	veor	q3,q3,q10
+	veor	q8,q8,q10
+	vst1.32	{q3},[r2]!
+	bne	Loop192
+
+	mov	r12,#12
+	add	r2,r2,#0x20
+	b	Ldone
+
+.align	4
+L256:
+	vld1.8	{q8},[r0]
+	mov	r1,#7
+	mov	r12,#14
+	vst1.32	{q3},[r2]!
+
+Loop256:
+	vtbl.8	d20,{q8},d4
+	vtbl.8	d21,{q8},d5
+	vext.8	q9,q0,q3,#12
+	vst1.32	{q8},[r2]!
+.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
+	subs	r1,r1,#1
+
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q10,q10,q1
+	veor	q3,q3,q9
+	vshl.u8	q1,q1,#1
+	veor	q3,q3,q10
+	vst1.32	{q3},[r2]!
+	beq	Ldone
+
+	vdup.32	q10,d7[1]
+	vext.8	q9,q0,q8,#12
+.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
+
+	veor	q8,q8,q9
+	vext.8	q9,q0,q9,#12
+	veor	q8,q8,q9
+	vext.8	q9,q0,q9,#12
+	veor	q8,q8,q9
+
+	veor	q8,q8,q10
+	b	Loop256
+
+Ldone:
+	str	r12,[r2]
+	mov	r3,#0
+
+Lenc_key_abort:
+	mov	r0,r3			@ return value
+
+	bx	lr
+
+
+.globl	_aes_hw_set_decrypt_key
+.private_extern	_aes_hw_set_decrypt_key
+#ifdef __thumb2__
+.thumb_func	_aes_hw_set_decrypt_key
+#endif
+.align	5
+_aes_hw_set_decrypt_key:
+	stmdb	sp!,{r4,lr}
+	bl	Lenc_key
+
+	cmp	r0,#0
+	bne	Ldec_key_abort
+
+	sub	r2,r2,#240		@ restore original r2
+	mov	r4,#-16
+	add	r0,r2,r12,lsl#4	@ end of key schedule
+
+	vld1.32	{q0},[r2]
+	vld1.32	{q1},[r0]
+	vst1.32	{q0},[r0],r4
+	vst1.32	{q1},[r2]!
+
+Loop_imc:
+	vld1.32	{q0},[r2]
+	vld1.32	{q1},[r0]
+.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+	vst1.32	{q0},[r0],r4
+	vst1.32	{q1},[r2]!
+	cmp	r0,r2
+	bhi	Loop_imc
+
+	vld1.32	{q0},[r2]
+.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
+	vst1.32	{q0},[r0]
+
+	eor	r0,r0,r0		@ return value
+Ldec_key_abort:
+	ldmia	sp!,{r4,pc}
+
+.globl	_aes_hw_encrypt
+.private_extern	_aes_hw_encrypt
+#ifdef __thumb2__
+.thumb_func	_aes_hw_encrypt
+#endif
+.align	5
+_aes_hw_encrypt:
+	ldr	r3,[r2,#240]
+	vld1.32	{q0},[r2]!
+	vld1.8	{q2},[r0]
+	sub	r3,r3,#2
+	vld1.32	{q1},[r2]!
+
+Loop_enc:
+.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
+.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
+	vld1.32	{q0},[r2]!
+	subs	r3,r3,#2
+.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
+.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
+	vld1.32	{q1},[r2]!
+	bgt	Loop_enc
+
+.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
+.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
+	vld1.32	{q0},[r2]
+.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
+	veor	q2,q2,q0
+
+	vst1.8	{q2},[r1]
+	bx	lr
+
+.globl	_aes_hw_decrypt
+.private_extern	_aes_hw_decrypt
+#ifdef __thumb2__
+.thumb_func	_aes_hw_decrypt
+#endif
+.align	5
+_aes_hw_decrypt:
+	ldr	r3,[r2,#240]
+	vld1.32	{q0},[r2]!
+	vld1.8	{q2},[r0]
+	sub	r3,r3,#2
+	vld1.32	{q1},[r2]!
+
+Loop_dec:
+.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
+.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
+	vld1.32	{q0},[r2]!
+	subs	r3,r3,#2
+.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
+.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
+	vld1.32	{q1},[r2]!
+	bgt	Loop_dec
+
+.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
+.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
+	vld1.32	{q0},[r2]
+.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
+	veor	q2,q2,q0
+
+	vst1.8	{q2},[r1]
+	bx	lr
+
+.globl	_aes_hw_cbc_encrypt
+.private_extern	_aes_hw_cbc_encrypt
+#ifdef __thumb2__
+.thumb_func	_aes_hw_cbc_encrypt
+#endif
+.align	5
+_aes_hw_cbc_encrypt:
+	mov	ip,sp
+	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
+	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
+	ldmia	ip,{r4,r5}		@ load remaining args
+	subs	r2,r2,#16
+	mov	r8,#16
+	blo	Lcbc_abort
+	moveq	r8,#0
+
+	cmp	r5,#0			@ en- or decrypting?
+	ldr	r5,[r3,#240]
+	and	r2,r2,#-16
+	vld1.8	{q6},[r4]
+	vld1.8	{q0},[r0],r8
+
+	vld1.32	{q8,q9},[r3]		@ load key schedule...
+	sub	r5,r5,#6
+	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
+	sub	r5,r5,#2
+	vld1.32	{q10,q11},[r7]!
+	vld1.32	{q12,q13},[r7]!
+	vld1.32	{q14,q15},[r7]!
+	vld1.32	{q7},[r7]
+
+	add	r7,r3,#32
+	mov	r6,r5
+	beq	Lcbc_dec
+
+	cmp	r5,#2
+	veor	q0,q0,q6
+	veor	q5,q8,q7
+	beq	Lcbc_enc128
+
+	vld1.32	{q2,q3},[r7]
+	add	r7,r3,#16
+	add	r6,r3,#16*4
+	add	r12,r3,#16*5
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	add	r14,r3,#16*6
+	add	r3,r3,#16*7
+	b	Lenter_cbc_enc
+
+.align	4
+Loop_cbc_enc:
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	vst1.8	{q6},[r1]!
+Lenter_cbc_enc:
+.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	vld1.32	{q8},[r6]
+	cmp	r5,#4
+.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	vld1.32	{q9},[r12]
+	beq	Lcbc_enc192
+
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	vld1.32	{q8},[r14]
+.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	vld1.32	{q9},[r3]
+	nop
+
+Lcbc_enc192:
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	subs	r2,r2,#16
+.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	moveq	r8,#0
+.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	vld1.8	{q8},[r0],r8
+.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	veor	q8,q8,q5
+.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
+.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
+	veor	q6,q0,q7
+	bhs	Loop_cbc_enc
+
+	vst1.8	{q6},[r1]!
+	b	Lcbc_done
+
+.align	5
+Lcbc_enc128:
+	vld1.32	{q2,q3},[r7]
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	b	Lenter_cbc_enc128
+Loop_cbc_enc128:
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	vst1.8	{q6},[r1]!
+Lenter_cbc_enc128:
+.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	subs	r2,r2,#16
+.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	moveq	r8,#0
+.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	vld1.8	{q8},[r0],r8
+.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	veor	q8,q8,q5
+.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
+	veor	q6,q0,q7
+	bhs	Loop_cbc_enc128
+
+	vst1.8	{q6},[r1]!
+	b	Lcbc_done
+.align	5
+Lcbc_dec:
+	vld1.8	{q10},[r0]!
+	subs	r2,r2,#32		@ bias
+	add	r6,r5,#2
+	vorr	q3,q0,q0
+	vorr	q1,q0,q0
+	vorr	q11,q10,q10
+	blo	Lcbc_dec_tail
+
+	vorr	q1,q10,q10
+	vld1.8	{q10},[r0]!
+	vorr	q2,q0,q0
+	vorr	q3,q1,q1
+	vorr	q11,q10,q10
+
+Loop3x_cbc_dec:
+.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
+.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
+.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	vld1.32	{q8},[r7]!
+	subs	r6,r6,#2
+.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
+.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
+.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	vld1.32	{q9},[r7]!
+	bgt	Loop3x_cbc_dec
+
+.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
+.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
+.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	veor	q4,q6,q7
+	subs	r2,r2,#0x30
+	veor	q5,q2,q7
+	movlo	r6,r2			@ r6, r6, is zero at this point
+.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
+.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
+.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	veor	q9,q3,q7
+	add	r0,r0,r6		@ r0 is adjusted in such way that
+					@ at exit from the loop q1-q10
+					@ are loaded with last "words"
+	vorr	q6,q11,q11
+	mov	r7,r3
+.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
+.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
+.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	vld1.8	{q2},[r0]!
+.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
+.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
+.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	vld1.8	{q3},[r0]!
+.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
+.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
+.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	vld1.8	{q11},[r0]!
+.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
+.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
+.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
+	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
+	add	r6,r5,#2
+	veor	q4,q4,q0
+	veor	q5,q5,q1
+	veor	q10,q10,q9
+	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
+	vst1.8	{q4},[r1]!
+	vorr	q0,q2,q2
+	vst1.8	{q5},[r1]!
+	vorr	q1,q3,q3
+	vst1.8	{q10},[r1]!
+	vorr	q10,q11,q11
+	bhs	Loop3x_cbc_dec
+
+	cmn	r2,#0x30
+	beq	Lcbc_done
+	nop
+
+Lcbc_dec_tail:
+.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	vld1.32	{q8},[r7]!
+	subs	r6,r6,#2
+.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	vld1.32	{q9},[r7]!
+	bgt	Lcbc_dec_tail
+
+.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	cmn	r2,#0x20
+.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	veor	q5,q6,q7
+.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	veor	q9,q3,q7
+.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
+.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
+	beq	Lcbc_dec_one
+	veor	q5,q5,q1
+	veor	q9,q9,q10
+	vorr	q6,q11,q11
+	vst1.8	{q5},[r1]!
+	vst1.8	{q9},[r1]!
+	b	Lcbc_done
+
+Lcbc_dec_one:
+	veor	q5,q5,q10
+	vorr	q6,q11,q11
+	vst1.8	{q5},[r1]!
+
+Lcbc_done:
+	vst1.8	{q6},[r4]
+Lcbc_abort:
+	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
+	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
+
+.globl	_aes_hw_ctr32_encrypt_blocks
+.private_extern	_aes_hw_ctr32_encrypt_blocks
+#ifdef __thumb2__
+.thumb_func	_aes_hw_ctr32_encrypt_blocks
+#endif
+.align	5
+_aes_hw_ctr32_encrypt_blocks:
+	mov	ip,sp
+	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
+	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
+	ldr	r4, [ip]		@ load remaining arg
+	ldr	r5,[r3,#240]
+
+	ldr	r8, [r4, #12]
+	vld1.32	{q0},[r4]
+
+	vld1.32	{q8,q9},[r3]		@ load key schedule...
+	sub	r5,r5,#4
+	mov	r12,#16
+	cmp	r2,#2
+	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
+	sub	r5,r5,#2
+	vld1.32	{q12,q13},[r7]!
+	vld1.32	{q14,q15},[r7]!
+	vld1.32	{q7},[r7]
+	add	r7,r3,#32
+	mov	r6,r5
+	movlo	r12,#0
+#ifndef __ARMEB__
+	rev	r8, r8
+#endif
+	vorr	q1,q0,q0
+	add	r10, r8, #1
+	vorr	q10,q0,q0
+	add	r8, r8, #2
+	vorr	q6,q0,q0
+	rev	r10, r10
+	vmov.32	d3[1],r10
+	bls	Lctr32_tail
+	rev	r12, r8
+	sub	r2,r2,#3		@ bias
+	vmov.32	d21[1],r12
+	b	Loop3x_ctr32
+
+.align	4
+Loop3x_ctr32:
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
+.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
+.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
+.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
+	vld1.32	{q8},[r7]!
+	subs	r6,r6,#2
+.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
+.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
+.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
+.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
+	vld1.32	{q9},[r7]!
+	bgt	Loop3x_ctr32
+
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
+.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
+.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
+	vld1.8	{q2},[r0]!
+	vorr	q0,q6,q6
+.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
+.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
+	vld1.8	{q3},[r0]!
+	vorr	q1,q6,q6
+.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
+.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
+.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
+.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
+	vld1.8	{q11},[r0]!
+	mov	r7,r3
+.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
+.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
+	vorr	q10,q6,q6
+	add	r9,r8,#1
+.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
+.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
+.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
+.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
+	veor	q2,q2,q7
+	add	r10,r8,#2
+.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
+.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
+	veor	q3,q3,q7
+	add	r8,r8,#3
+.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
+.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
+.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
+.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
+	veor	q11,q11,q7
+	rev	r9,r9
+.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
+.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
+	vmov.32	d1[1], r9
+	rev	r10,r10
+.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
+.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
+.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
+.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
+	vmov.32	d3[1], r10
+	rev	r12,r8
+.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
+.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
+	vmov.32	d21[1], r12
+	subs	r2,r2,#3
+.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
+.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
+.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
+
+	veor	q2,q2,q4
+	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
+	vst1.8	{q2},[r1]!
+	veor	q3,q3,q5
+	mov	r6,r5
+	vst1.8	{q3},[r1]!
+	veor	q11,q11,q9
+	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
+	vst1.8	{q11},[r1]!
+	bhs	Loop3x_ctr32
+
+	adds	r2,r2,#3
+	beq	Lctr32_done
+	cmp	r2,#1
+	mov	r12,#16
+	moveq	r12,#0
+
+Lctr32_tail:
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
+.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
+	vld1.32	{q8},[r7]!
+	subs	r6,r6,#2
+.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
+.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
+	vld1.32	{q9},[r7]!
+	bgt	Lctr32_tail
+
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
+.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
+.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
+.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
+	vld1.8	{q2},[r0],r12
+.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
+.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
+	vld1.8	{q3},[r0]
+.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
+.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
+	veor	q2,q2,q7
+.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
+.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
+	veor	q3,q3,q7
+.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
+.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
+
+	cmp	r2,#1
+	veor	q2,q2,q0
+	veor	q3,q3,q1
+	vst1.8	{q2},[r1]!
+	beq	Lctr32_done
+	vst1.8	{q3},[r1]
+
+Lctr32_done:
+	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
+	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
+
+#endif
+#endif  // !OPENSSL_NO_ASM
+#endif  // defined(__arm__) && defined(__APPLE__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/aesv8-armx32.linux.arm.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/aesv8-armx32.linux.arm.S
@ -0,0 +1,788 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__arm__) && defined(__linux__)
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(__arm__)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+#include <CBigNumBoringSSL_arm_arch.h>
+
+#if __ARM_MAX_ARCH__>=7
+.text
+.arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
+.fpu	neon
+.code	32
+#undef	__thumb2__
+.align	5
+.Lrcon:
+.long	0x01,0x01,0x01,0x01
+.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
+.long	0x1b,0x1b,0x1b,0x1b
+
+.text
+
+.globl	aes_hw_set_encrypt_key
+.hidden	aes_hw_set_encrypt_key
+.type	aes_hw_set_encrypt_key,%function
+.align	5
+aes_hw_set_encrypt_key:
+.Lenc_key:
+	mov	r3,#-1
+	cmp	r0,#0
+	beq	.Lenc_key_abort
+	cmp	r2,#0
+	beq	.Lenc_key_abort
+	mov	r3,#-2
+	cmp	r1,#128
+	blt	.Lenc_key_abort
+	cmp	r1,#256
+	bgt	.Lenc_key_abort
+	tst	r1,#0x3f
+	bne	.Lenc_key_abort
+
+	adr	r3,.Lrcon
+	cmp	r1,#192
+
+	veor	q0,q0,q0
+	vld1.8	{q3},[r0]!
+	mov	r1,#8		@ reuse r1
+	vld1.32	{q1,q2},[r3]!
+
+	blt	.Loop128
+	beq	.L192
+	b	.L256
+
+.align	4
+.Loop128:
+	vtbl.8	d20,{q3},d4
+	vtbl.8	d21,{q3},d5
+	vext.8	q9,q0,q3,#12
+	vst1.32	{q3},[r2]!
+.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
+	subs	r1,r1,#1
+
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q10,q10,q1
+	veor	q3,q3,q9
+	vshl.u8	q1,q1,#1
+	veor	q3,q3,q10
+	bne	.Loop128
+
+	vld1.32	{q1},[r3]
+
+	vtbl.8	d20,{q3},d4
+	vtbl.8	d21,{q3},d5
+	vext.8	q9,q0,q3,#12
+	vst1.32	{q3},[r2]!
+.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
+
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q10,q10,q1
+	veor	q3,q3,q9
+	vshl.u8	q1,q1,#1
+	veor	q3,q3,q10
+
+	vtbl.8	d20,{q3},d4
+	vtbl.8	d21,{q3},d5
+	vext.8	q9,q0,q3,#12
+	vst1.32	{q3},[r2]!
+.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
+
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q10,q10,q1
+	veor	q3,q3,q9
+	veor	q3,q3,q10
+	vst1.32	{q3},[r2]
+	add	r2,r2,#0x50
+
+	mov	r12,#10
+	b	.Ldone
+
+.align	4
+.L192:
+	vld1.8	{d16},[r0]!
+	vmov.i8	q10,#8			@ borrow q10
+	vst1.32	{q3},[r2]!
+	vsub.i8	q2,q2,q10	@ adjust the mask
+
+.Loop192:
+	vtbl.8	d20,{q8},d4
+	vtbl.8	d21,{q8},d5
+	vext.8	q9,q0,q3,#12
+	vst1.32	{d16},[r2]!
+.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
+	subs	r1,r1,#1
+
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q3,q3,q9
+
+	vdup.32	q9,d7[1]
+	veor	q9,q9,q8
+	veor	q10,q10,q1
+	vext.8	q8,q0,q8,#12
+	vshl.u8	q1,q1,#1
+	veor	q8,q8,q9
+	veor	q3,q3,q10
+	veor	q8,q8,q10
+	vst1.32	{q3},[r2]!
+	bne	.Loop192
+
+	mov	r12,#12
+	add	r2,r2,#0x20
+	b	.Ldone
+
+.align	4
+.L256:
+	vld1.8	{q8},[r0]
+	mov	r1,#7
+	mov	r12,#14
+	vst1.32	{q3},[r2]!
+
+.Loop256:
+	vtbl.8	d20,{q8},d4
+	vtbl.8	d21,{q8},d5
+	vext.8	q9,q0,q3,#12
+	vst1.32	{q8},[r2]!
+.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
+	subs	r1,r1,#1
+
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q3,q3,q9
+	vext.8	q9,q0,q9,#12
+	veor	q10,q10,q1
+	veor	q3,q3,q9
+	vshl.u8	q1,q1,#1
+	veor	q3,q3,q10
+	vst1.32	{q3},[r2]!
+	beq	.Ldone
+
+	vdup.32	q10,d7[1]
+	vext.8	q9,q0,q8,#12
+.byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
+
+	veor	q8,q8,q9
+	vext.8	q9,q0,q9,#12
+	veor	q8,q8,q9
+	vext.8	q9,q0,q9,#12
+	veor	q8,q8,q9
+
+	veor	q8,q8,q10
+	b	.Loop256
+
+.Ldone:
+	str	r12,[r2]
+	mov	r3,#0
+
+.Lenc_key_abort:
+	mov	r0,r3			@ return value
+
+	bx	lr
+.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
+
+.globl	aes_hw_set_decrypt_key
+.hidden	aes_hw_set_decrypt_key
+.type	aes_hw_set_decrypt_key,%function
+.align	5
+aes_hw_set_decrypt_key:
+	stmdb	sp!,{r4,lr}
+	bl	.Lenc_key
+
+	cmp	r0,#0
+	bne	.Ldec_key_abort
+
+	sub	r2,r2,#240		@ restore original r2
+	mov	r4,#-16
+	add	r0,r2,r12,lsl#4	@ end of key schedule
+
+	vld1.32	{q0},[r2]
+	vld1.32	{q1},[r0]
+	vst1.32	{q0},[r0],r4
+	vst1.32	{q1},[r2]!
+
+.Loop_imc:
+	vld1.32	{q0},[r2]
+	vld1.32	{q1},[r0]
+.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+	vst1.32	{q0},[r0],r4
+	vst1.32	{q1},[r2]!
+	cmp	r0,r2
+	bhi	.Loop_imc
+
+	vld1.32	{q0},[r2]
+.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
+	vst1.32	{q0},[r0]
+
+	eor	r0,r0,r0		@ return value
+.Ldec_key_abort:
+	ldmia	sp!,{r4,pc}
+.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
+.globl	aes_hw_encrypt
+.hidden	aes_hw_encrypt
+.type	aes_hw_encrypt,%function
+.align	5
+aes_hw_encrypt:
+	ldr	r3,[r2,#240]
+	vld1.32	{q0},[r2]!
+	vld1.8	{q2},[r0]
+	sub	r3,r3,#2
+	vld1.32	{q1},[r2]!
+
+.Loop_enc:
+.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
+.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
+	vld1.32	{q0},[r2]!
+	subs	r3,r3,#2
+.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
+.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
+	vld1.32	{q1},[r2]!
+	bgt	.Loop_enc
+
+.byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
+.byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
+	vld1.32	{q0},[r2]
+.byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
+	veor	q2,q2,q0
+
+	vst1.8	{q2},[r1]
+	bx	lr
+.size	aes_hw_encrypt,.-aes_hw_encrypt
+.globl	aes_hw_decrypt
+.hidden	aes_hw_decrypt
+.type	aes_hw_decrypt,%function
+.align	5
+aes_hw_decrypt:
+	ldr	r3,[r2,#240]
+	vld1.32	{q0},[r2]!
+	vld1.8	{q2},[r0]
+	sub	r3,r3,#2
+	vld1.32	{q1},[r2]!
+
+.Loop_dec:
+.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
+.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
+	vld1.32	{q0},[r2]!
+	subs	r3,r3,#2
+.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
+.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
+	vld1.32	{q1},[r2]!
+	bgt	.Loop_dec
+
+.byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
+.byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
+	vld1.32	{q0},[r2]
+.byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
+	veor	q2,q2,q0
+
+	vst1.8	{q2},[r1]
+	bx	lr
+.size	aes_hw_decrypt,.-aes_hw_decrypt
+.globl	aes_hw_cbc_encrypt
+.hidden	aes_hw_cbc_encrypt
+.type	aes_hw_cbc_encrypt,%function
+.align	5
+aes_hw_cbc_encrypt:
+	mov	ip,sp
+	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
+	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
+	ldmia	ip,{r4,r5}		@ load remaining args
+	subs	r2,r2,#16
+	mov	r8,#16
+	blo	.Lcbc_abort
+	moveq	r8,#0
+
+	cmp	r5,#0			@ en- or decrypting?
+	ldr	r5,[r3,#240]
+	and	r2,r2,#-16
+	vld1.8	{q6},[r4]
+	vld1.8	{q0},[r0],r8
+
+	vld1.32	{q8,q9},[r3]		@ load key schedule...
+	sub	r5,r5,#6
+	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
+	sub	r5,r5,#2
+	vld1.32	{q10,q11},[r7]!
+	vld1.32	{q12,q13},[r7]!
+	vld1.32	{q14,q15},[r7]!
+	vld1.32	{q7},[r7]
+
+	add	r7,r3,#32
+	mov	r6,r5
+	beq	.Lcbc_dec
+
+	cmp	r5,#2
+	veor	q0,q0,q6
+	veor	q5,q8,q7
+	beq	.Lcbc_enc128
+
+	vld1.32	{q2,q3},[r7]
+	add	r7,r3,#16
+	add	r6,r3,#16*4
+	add	r12,r3,#16*5
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	add	r14,r3,#16*6
+	add	r3,r3,#16*7
+	b	.Lenter_cbc_enc
+
+.align	4
+.Loop_cbc_enc:
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	vst1.8	{q6},[r1]!
+.Lenter_cbc_enc:
+.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	vld1.32	{q8},[r6]
+	cmp	r5,#4
+.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	vld1.32	{q9},[r12]
+	beq	.Lcbc_enc192
+
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	vld1.32	{q8},[r14]
+.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	vld1.32	{q9},[r3]
+	nop
+
+.Lcbc_enc192:
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	subs	r2,r2,#16
+.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	moveq	r8,#0
+.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	vld1.8	{q8},[r0],r8
+.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	veor	q8,q8,q5
+.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
+.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
+	veor	q6,q0,q7
+	bhs	.Loop_cbc_enc
+
+	vst1.8	{q6},[r1]!
+	b	.Lcbc_done
+
+.align	5
+.Lcbc_enc128:
+	vld1.32	{q2,q3},[r7]
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	b	.Lenter_cbc_enc128
+.Loop_cbc_enc128:
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	vst1.8	{q6},[r1]!
+.Lenter_cbc_enc128:
+.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	subs	r2,r2,#16
+.byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	moveq	r8,#0
+.byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	vld1.8	{q8},[r0],r8
+.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+	veor	q8,q8,q5
+.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
+	veor	q6,q0,q7
+	bhs	.Loop_cbc_enc128
+
+	vst1.8	{q6},[r1]!
+	b	.Lcbc_done
+.align	5
+.Lcbc_dec:
+	vld1.8	{q10},[r0]!
+	subs	r2,r2,#32		@ bias
+	add	r6,r5,#2
+	vorr	q3,q0,q0
+	vorr	q1,q0,q0
+	vorr	q11,q10,q10
+	blo	.Lcbc_dec_tail
+
+	vorr	q1,q10,q10
+	vld1.8	{q10},[r0]!
+	vorr	q2,q0,q0
+	vorr	q3,q1,q1
+	vorr	q11,q10,q10
+
+.Loop3x_cbc_dec:
+.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
+.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
+.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	vld1.32	{q8},[r7]!
+	subs	r6,r6,#2
+.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
+.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
+.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	vld1.32	{q9},[r7]!
+	bgt	.Loop3x_cbc_dec
+
+.byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
+.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
+.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	veor	q4,q6,q7
+	subs	r2,r2,#0x30
+	veor	q5,q2,q7
+	movlo	r6,r2			@ r6, r6, is zero at this point
+.byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
+.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
+.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	veor	q9,q3,q7
+	add	r0,r0,r6		@ r0 is adjusted in such way that
+					@ at exit from the loop q1-q10
+					@ are loaded with last "words"
+	vorr	q6,q11,q11
+	mov	r7,r3
+.byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
+.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
+.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	vld1.8	{q2},[r0]!
+.byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
+.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
+.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	vld1.8	{q3},[r0]!
+.byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
+.byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
+.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	vld1.8	{q11},[r0]!
+.byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
+.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
+.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
+	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
+	add	r6,r5,#2
+	veor	q4,q4,q0
+	veor	q5,q5,q1
+	veor	q10,q10,q9
+	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
+	vst1.8	{q4},[r1]!
+	vorr	q0,q2,q2
+	vst1.8	{q5},[r1]!
+	vorr	q1,q3,q3
+	vst1.8	{q10},[r1]!
+	vorr	q10,q11,q11
+	bhs	.Loop3x_cbc_dec
+
+	cmn	r2,#0x30
+	beq	.Lcbc_done
+	nop
+
+.Lcbc_dec_tail:
+.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	vld1.32	{q8},[r7]!
+	subs	r6,r6,#2
+.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	vld1.32	{q9},[r7]!
+	bgt	.Lcbc_dec_tail
+
+.byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+.byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+.byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	cmn	r2,#0x20
+.byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	veor	q5,q6,q7
+.byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
+.byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
+.byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
+.byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
+	veor	q9,q3,q7
+.byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
+.byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
+	beq	.Lcbc_dec_one
+	veor	q5,q5,q1
+	veor	q9,q9,q10
+	vorr	q6,q11,q11
+	vst1.8	{q5},[r1]!
+	vst1.8	{q9},[r1]!
+	b	.Lcbc_done
+
+.Lcbc_dec_one:
+	veor	q5,q5,q10
+	vorr	q6,q11,q11
+	vst1.8	{q5},[r1]!
+
+.Lcbc_done:
+	vst1.8	{q6},[r4]
+.Lcbc_abort:
+	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
+	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
+.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
+.globl	aes_hw_ctr32_encrypt_blocks
+.hidden	aes_hw_ctr32_encrypt_blocks
+.type	aes_hw_ctr32_encrypt_blocks,%function
+.align	5
+aes_hw_ctr32_encrypt_blocks:
+	mov	ip,sp
+	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
+	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
+	ldr	r4, [ip]		@ load remaining arg
+	ldr	r5,[r3,#240]
+
+	ldr	r8, [r4, #12]
+	vld1.32	{q0},[r4]
+
+	vld1.32	{q8,q9},[r3]		@ load key schedule...
+	sub	r5,r5,#4
+	mov	r12,#16
+	cmp	r2,#2
+	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
+	sub	r5,r5,#2
+	vld1.32	{q12,q13},[r7]!
+	vld1.32	{q14,q15},[r7]!
+	vld1.32	{q7},[r7]
+	add	r7,r3,#32
+	mov	r6,r5
+	movlo	r12,#0
+#ifndef __ARMEB__
+	rev	r8, r8
+#endif
+	vorr	q1,q0,q0
+	add	r10, r8, #1
+	vorr	q10,q0,q0
+	add	r8, r8, #2
+	vorr	q6,q0,q0
+	rev	r10, r10
+	vmov.32	d3[1],r10
+	bls	.Lctr32_tail
+	rev	r12, r8
+	sub	r2,r2,#3		@ bias
+	vmov.32	d21[1],r12
+	b	.Loop3x_ctr32
+
+.align	4
+.Loop3x_ctr32:
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
+.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
+.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
+.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
+	vld1.32	{q8},[r7]!
+	subs	r6,r6,#2
+.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
+.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
+.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
+.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
+	vld1.32	{q9},[r7]!
+	bgt	.Loop3x_ctr32
+
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
+.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
+.byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
+	vld1.8	{q2},[r0]!
+	vorr	q0,q6,q6
+.byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
+.byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
+	vld1.8	{q3},[r0]!
+	vorr	q1,q6,q6
+.byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
+.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
+.byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
+.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
+	vld1.8	{q11},[r0]!
+	mov	r7,r3
+.byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
+.byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
+	vorr	q10,q6,q6
+	add	r9,r8,#1
+.byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
+.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
+.byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
+.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
+	veor	q2,q2,q7
+	add	r10,r8,#2
+.byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
+.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
+	veor	q3,q3,q7
+	add	r8,r8,#3
+.byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
+.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
+.byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
+.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
+	veor	q11,q11,q7
+	rev	r9,r9
+.byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
+.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
+	vmov.32	d1[1], r9
+	rev	r10,r10
+.byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
+.byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
+.byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
+.byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
+	vmov.32	d3[1], r10
+	rev	r12,r8
+.byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
+.byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
+	vmov.32	d21[1], r12
+	subs	r2,r2,#3
+.byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
+.byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
+.byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
+
+	veor	q2,q2,q4
+	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
+	vst1.8	{q2},[r1]!
+	veor	q3,q3,q5
+	mov	r6,r5
+	vst1.8	{q3},[r1]!
+	veor	q11,q11,q9
+	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
+	vst1.8	{q11},[r1]!
+	bhs	.Loop3x_ctr32
+
+	adds	r2,r2,#3
+	beq	.Lctr32_done
+	cmp	r2,#1
+	mov	r12,#16
+	moveq	r12,#0
+
+.Lctr32_tail:
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
+.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
+	vld1.32	{q8},[r7]!
+	subs	r6,r6,#2
+.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
+.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
+	vld1.32	{q9},[r7]!
+	bgt	.Lctr32_tail
+
+.byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
+.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
+.byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
+.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
+	vld1.8	{q2},[r0],r12
+.byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
+.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
+	vld1.8	{q3},[r0]
+.byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
+.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
+	veor	q2,q2,q7
+.byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
+.byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
+.byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
+.byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
+	veor	q3,q3,q7
+.byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
+.byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
+
+	cmp	r2,#1
+	veor	q2,q2,q0
+	veor	q3,q3,q1
+	vst1.8	{q2},[r1]!
+	beq	.Lctr32_done
+	vst1.8	{q3},[r1]
+
+.Lctr32_done:
+	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
+	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
+.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
+#endif
+#endif
+#endif  // !OPENSSL_NO_ASM
+.section	.note.GNU-stack,"",%progbits
+#endif  // defined(__arm__) && defined(__linux__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/aesv8-armx64.ios.aarch64.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/aesv8-armx64.ios.aarch64.S
@ -0,0 +1,779 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__aarch64__) && defined(__APPLE__)
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+#include <CBigNumBoringSSL_arm_arch.h>
+
+#if __ARM_MAX_ARCH__>=7
+.text
+
+.section	__TEXT,__const
+.align	5
+Lrcon:
+.long	0x01,0x01,0x01,0x01
+.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
+.long	0x1b,0x1b,0x1b,0x1b
+
+.text
+
+.globl	_aes_hw_set_encrypt_key
+.private_extern	_aes_hw_set_encrypt_key
+
+.align	5
+_aes_hw_set_encrypt_key:
+Lenc_key:
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+	mov	x3,#-1
+	cmp	x0,#0
+	b.eq	Lenc_key_abort
+	cmp	x2,#0
+	b.eq	Lenc_key_abort
+	mov	x3,#-2
+	cmp	w1,#128
+	b.lt	Lenc_key_abort
+	cmp	w1,#256
+	b.gt	Lenc_key_abort
+	tst	w1,#0x3f
+	b.ne	Lenc_key_abort
+
+	adrp	x3,Lrcon@PAGE
+	add	x3,x3,Lrcon@PAGEOFF
+	cmp	w1,#192
+
+	eor	v0.16b,v0.16b,v0.16b
+	ld1	{v3.16b},[x0],#16
+	mov	w1,#8		// reuse w1
+	ld1	{v1.4s,v2.4s},[x3],#32
+
+	b.lt	Loop128
+	b.eq	L192
+	b	L256
+
+.align	4
+Loop128:
+	tbl	v6.16b,{v3.16b},v2.16b
+	ext	v5.16b,v0.16b,v3.16b,#12
+	st1	{v3.4s},[x2],#16
+	aese	v6.16b,v0.16b
+	subs	w1,w1,#1
+
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v6.16b,v6.16b,v1.16b
+	eor	v3.16b,v3.16b,v5.16b
+	shl	v1.16b,v1.16b,#1
+	eor	v3.16b,v3.16b,v6.16b
+	b.ne	Loop128
+
+	ld1	{v1.4s},[x3]
+
+	tbl	v6.16b,{v3.16b},v2.16b
+	ext	v5.16b,v0.16b,v3.16b,#12
+	st1	{v3.4s},[x2],#16
+	aese	v6.16b,v0.16b
+
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v6.16b,v6.16b,v1.16b
+	eor	v3.16b,v3.16b,v5.16b
+	shl	v1.16b,v1.16b,#1
+	eor	v3.16b,v3.16b,v6.16b
+
+	tbl	v6.16b,{v3.16b},v2.16b
+	ext	v5.16b,v0.16b,v3.16b,#12
+	st1	{v3.4s},[x2],#16
+	aese	v6.16b,v0.16b
+
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v6.16b,v6.16b,v1.16b
+	eor	v3.16b,v3.16b,v5.16b
+	eor	v3.16b,v3.16b,v6.16b
+	st1	{v3.4s},[x2]
+	add	x2,x2,#0x50
+
+	mov	w12,#10
+	b	Ldone
+
+.align	4
+L192:
+	ld1	{v4.8b},[x0],#8
+	movi	v6.16b,#8			// borrow v6.16b
+	st1	{v3.4s},[x2],#16
+	sub	v2.16b,v2.16b,v6.16b	// adjust the mask
+
+Loop192:
+	tbl	v6.16b,{v4.16b},v2.16b
+	ext	v5.16b,v0.16b,v3.16b,#12
+	st1	{v4.8b},[x2],#8
+	aese	v6.16b,v0.16b
+	subs	w1,w1,#1
+
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v3.16b,v3.16b,v5.16b
+
+	dup	v5.4s,v3.s[3]
+	eor	v5.16b,v5.16b,v4.16b
+	eor	v6.16b,v6.16b,v1.16b
+	ext	v4.16b,v0.16b,v4.16b,#12
+	shl	v1.16b,v1.16b,#1
+	eor	v4.16b,v4.16b,v5.16b
+	eor	v3.16b,v3.16b,v6.16b
+	eor	v4.16b,v4.16b,v6.16b
+	st1	{v3.4s},[x2],#16
+	b.ne	Loop192
+
+	mov	w12,#12
+	add	x2,x2,#0x20
+	b	Ldone
+
+.align	4
+L256:
+	ld1	{v4.16b},[x0]
+	mov	w1,#7
+	mov	w12,#14
+	st1	{v3.4s},[x2],#16
+
+Loop256:
+	tbl	v6.16b,{v4.16b},v2.16b
+	ext	v5.16b,v0.16b,v3.16b,#12
+	st1	{v4.4s},[x2],#16
+	aese	v6.16b,v0.16b
+	subs	w1,w1,#1
+
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v6.16b,v6.16b,v1.16b
+	eor	v3.16b,v3.16b,v5.16b
+	shl	v1.16b,v1.16b,#1
+	eor	v3.16b,v3.16b,v6.16b
+	st1	{v3.4s},[x2],#16
+	b.eq	Ldone
+
+	dup	v6.4s,v3.s[3]		// just splat
+	ext	v5.16b,v0.16b,v4.16b,#12
+	aese	v6.16b,v0.16b
+
+	eor	v4.16b,v4.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v4.16b,v4.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v4.16b,v4.16b,v5.16b
+
+	eor	v4.16b,v4.16b,v6.16b
+	b	Loop256
+
+Ldone:
+	str	w12,[x2]
+	mov	x3,#0
+
+Lenc_key_abort:
+	mov	x0,x3			// return value
+	ldr	x29,[sp],#16
+	ret
+
+
+.globl	_aes_hw_set_decrypt_key
+.private_extern	_aes_hw_set_decrypt_key
+
+.align	5
+_aes_hw_set_decrypt_key:
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+	bl	Lenc_key
+
+	cmp	x0,#0
+	b.ne	Ldec_key_abort
+
+	sub	x2,x2,#240		// restore original x2
+	mov	x4,#-16
+	add	x0,x2,x12,lsl#4	// end of key schedule
+
+	ld1	{v0.4s},[x2]
+	ld1	{v1.4s},[x0]
+	st1	{v0.4s},[x0],x4
+	st1	{v1.4s},[x2],#16
+
+Loop_imc:
+	ld1	{v0.4s},[x2]
+	ld1	{v1.4s},[x0]
+	aesimc	v0.16b,v0.16b
+	aesimc	v1.16b,v1.16b
+	st1	{v0.4s},[x0],x4
+	st1	{v1.4s},[x2],#16
+	cmp	x0,x2
+	b.hi	Loop_imc
+
+	ld1	{v0.4s},[x2]
+	aesimc	v0.16b,v0.16b
+	st1	{v0.4s},[x0]
+
+	eor	x0,x0,x0		// return value
+Ldec_key_abort:
+	ldp	x29,x30,[sp],#16
+	ret
+
+.globl	_aes_hw_encrypt
+.private_extern	_aes_hw_encrypt
+
+.align	5
+_aes_hw_encrypt:
+	ldr	w3,[x2,#240]
+	ld1	{v0.4s},[x2],#16
+	ld1	{v2.16b},[x0]
+	sub	w3,w3,#2
+	ld1	{v1.4s},[x2],#16
+
+Loop_enc:
+	aese	v2.16b,v0.16b
+	aesmc	v2.16b,v2.16b
+	ld1	{v0.4s},[x2],#16
+	subs	w3,w3,#2
+	aese	v2.16b,v1.16b
+	aesmc	v2.16b,v2.16b
+	ld1	{v1.4s},[x2],#16
+	b.gt	Loop_enc
+
+	aese	v2.16b,v0.16b
+	aesmc	v2.16b,v2.16b
+	ld1	{v0.4s},[x2]
+	aese	v2.16b,v1.16b
+	eor	v2.16b,v2.16b,v0.16b
+
+	st1	{v2.16b},[x1]
+	ret
+
+.globl	_aes_hw_decrypt
+.private_extern	_aes_hw_decrypt
+
+.align	5
+_aes_hw_decrypt:
+	ldr	w3,[x2,#240]
+	ld1	{v0.4s},[x2],#16
+	ld1	{v2.16b},[x0]
+	sub	w3,w3,#2
+	ld1	{v1.4s},[x2],#16
+
+Loop_dec:
+	aesd	v2.16b,v0.16b
+	aesimc	v2.16b,v2.16b
+	ld1	{v0.4s},[x2],#16
+	subs	w3,w3,#2
+	aesd	v2.16b,v1.16b
+	aesimc	v2.16b,v2.16b
+	ld1	{v1.4s},[x2],#16
+	b.gt	Loop_dec
+
+	aesd	v2.16b,v0.16b
+	aesimc	v2.16b,v2.16b
+	ld1	{v0.4s},[x2]
+	aesd	v2.16b,v1.16b
+	eor	v2.16b,v2.16b,v0.16b
+
+	st1	{v2.16b},[x1]
+	ret
+
+.globl	_aes_hw_cbc_encrypt
+.private_extern	_aes_hw_cbc_encrypt
+
+.align	5
+_aes_hw_cbc_encrypt:
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+	subs	x2,x2,#16
+	mov	x8,#16
+	b.lo	Lcbc_abort
+	csel	x8,xzr,x8,eq
+
+	cmp	w5,#0			// en- or decrypting?
+	ldr	w5,[x3,#240]
+	and	x2,x2,#-16
+	ld1	{v6.16b},[x4]
+	ld1	{v0.16b},[x0],x8
+
+	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
+	sub	w5,w5,#6
+	add	x7,x3,x5,lsl#4	// pointer to last 7 round keys
+	sub	w5,w5,#2
+	ld1	{v18.4s,v19.4s},[x7],#32
+	ld1	{v20.4s,v21.4s},[x7],#32
+	ld1	{v22.4s,v23.4s},[x7],#32
+	ld1	{v7.4s},[x7]
+
+	add	x7,x3,#32
+	mov	w6,w5
+	b.eq	Lcbc_dec
+
+	cmp	w5,#2
+	eor	v0.16b,v0.16b,v6.16b
+	eor	v5.16b,v16.16b,v7.16b
+	b.eq	Lcbc_enc128
+
+	ld1	{v2.4s,v3.4s},[x7]
+	add	x7,x3,#16
+	add	x6,x3,#16*4
+	add	x12,x3,#16*5
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	add	x14,x3,#16*6
+	add	x3,x3,#16*7
+	b	Lenter_cbc_enc
+
+.align	4
+Loop_cbc_enc:
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	st1	{v6.16b},[x1],#16
+Lenter_cbc_enc:
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v2.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v16.4s},[x6]
+	cmp	w5,#4
+	aese	v0.16b,v3.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v17.4s},[x12]
+	b.eq	Lcbc_enc192
+
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v16.4s},[x14]
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v17.4s},[x3]
+	nop
+
+Lcbc_enc192:
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	subs	x2,x2,#16
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	csel	x8,xzr,x8,eq
+	aese	v0.16b,v18.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v19.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v16.16b},[x0],x8
+	aese	v0.16b,v20.16b
+	aesmc	v0.16b,v0.16b
+	eor	v16.16b,v16.16b,v5.16b
+	aese	v0.16b,v21.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v17.4s},[x7]		// re-pre-load rndkey[1]
+	aese	v0.16b,v22.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v23.16b
+	eor	v6.16b,v0.16b,v7.16b
+	b.hs	Loop_cbc_enc
+
+	st1	{v6.16b},[x1],#16
+	b	Lcbc_done
+
+.align	5
+Lcbc_enc128:
+	ld1	{v2.4s,v3.4s},[x7]
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	b	Lenter_cbc_enc128
+Loop_cbc_enc128:
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	st1	{v6.16b},[x1],#16
+Lenter_cbc_enc128:
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	subs	x2,x2,#16
+	aese	v0.16b,v2.16b
+	aesmc	v0.16b,v0.16b
+	csel	x8,xzr,x8,eq
+	aese	v0.16b,v3.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v18.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v19.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v16.16b},[x0],x8
+	aese	v0.16b,v20.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v21.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v22.16b
+	aesmc	v0.16b,v0.16b
+	eor	v16.16b,v16.16b,v5.16b
+	aese	v0.16b,v23.16b
+	eor	v6.16b,v0.16b,v7.16b
+	b.hs	Loop_cbc_enc128
+
+	st1	{v6.16b},[x1],#16
+	b	Lcbc_done
+.align	5
+Lcbc_dec:
+	ld1	{v18.16b},[x0],#16
+	subs	x2,x2,#32		// bias
+	add	w6,w5,#2
+	orr	v3.16b,v0.16b,v0.16b
+	orr	v1.16b,v0.16b,v0.16b
+	orr	v19.16b,v18.16b,v18.16b
+	b.lo	Lcbc_dec_tail
+
+	orr	v1.16b,v18.16b,v18.16b
+	ld1	{v18.16b},[x0],#16
+	orr	v2.16b,v0.16b,v0.16b
+	orr	v3.16b,v1.16b,v1.16b
+	orr	v19.16b,v18.16b,v18.16b
+
+Loop3x_cbc_dec:
+	aesd	v0.16b,v16.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v16.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v16.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v16.4s},[x7],#16
+	subs	w6,w6,#2
+	aesd	v0.16b,v17.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v17.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v17.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v17.4s},[x7],#16
+	b.gt	Loop3x_cbc_dec
+
+	aesd	v0.16b,v16.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v16.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v16.16b
+	aesimc	v18.16b,v18.16b
+	eor	v4.16b,v6.16b,v7.16b
+	subs	x2,x2,#0x30
+	eor	v5.16b,v2.16b,v7.16b
+	csel	x6,x2,x6,lo			// x6, w6, is zero at this point
+	aesd	v0.16b,v17.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v17.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v17.16b
+	aesimc	v18.16b,v18.16b
+	eor	v17.16b,v3.16b,v7.16b
+	add	x0,x0,x6		// x0 is adjusted in such way that
+					// at exit from the loop v1.16b-v18.16b
+					// are loaded with last "words"
+	orr	v6.16b,v19.16b,v19.16b
+	mov	x7,x3
+	aesd	v0.16b,v20.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v20.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v20.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v2.16b},[x0],#16
+	aesd	v0.16b,v21.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v21.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v21.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v3.16b},[x0],#16
+	aesd	v0.16b,v22.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v22.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v22.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v19.16b},[x0],#16
+	aesd	v0.16b,v23.16b
+	aesd	v1.16b,v23.16b
+	aesd	v18.16b,v23.16b
+	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
+	add	w6,w5,#2
+	eor	v4.16b,v4.16b,v0.16b
+	eor	v5.16b,v5.16b,v1.16b
+	eor	v18.16b,v18.16b,v17.16b
+	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
+	st1	{v4.16b},[x1],#16
+	orr	v0.16b,v2.16b,v2.16b
+	st1	{v5.16b},[x1],#16
+	orr	v1.16b,v3.16b,v3.16b
+	st1	{v18.16b},[x1],#16
+	orr	v18.16b,v19.16b,v19.16b
+	b.hs	Loop3x_cbc_dec
+
+	cmn	x2,#0x30
+	b.eq	Lcbc_done
+	nop
+
+Lcbc_dec_tail:
+	aesd	v1.16b,v16.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v16.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v16.4s},[x7],#16
+	subs	w6,w6,#2
+	aesd	v1.16b,v17.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v17.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v17.4s},[x7],#16
+	b.gt	Lcbc_dec_tail
+
+	aesd	v1.16b,v16.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v16.16b
+	aesimc	v18.16b,v18.16b
+	aesd	v1.16b,v17.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v17.16b
+	aesimc	v18.16b,v18.16b
+	aesd	v1.16b,v20.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v20.16b
+	aesimc	v18.16b,v18.16b
+	cmn	x2,#0x20
+	aesd	v1.16b,v21.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v21.16b
+	aesimc	v18.16b,v18.16b
+	eor	v5.16b,v6.16b,v7.16b
+	aesd	v1.16b,v22.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v22.16b
+	aesimc	v18.16b,v18.16b
+	eor	v17.16b,v3.16b,v7.16b
+	aesd	v1.16b,v23.16b
+	aesd	v18.16b,v23.16b
+	b.eq	Lcbc_dec_one
+	eor	v5.16b,v5.16b,v1.16b
+	eor	v17.16b,v17.16b,v18.16b
+	orr	v6.16b,v19.16b,v19.16b
+	st1	{v5.16b},[x1],#16
+	st1	{v17.16b},[x1],#16
+	b	Lcbc_done
+
+Lcbc_dec_one:
+	eor	v5.16b,v5.16b,v18.16b
+	orr	v6.16b,v19.16b,v19.16b
+	st1	{v5.16b},[x1],#16
+
+Lcbc_done:
+	st1	{v6.16b},[x4]
+Lcbc_abort:
+	ldr	x29,[sp],#16
+	ret
+
+.globl	_aes_hw_ctr32_encrypt_blocks
+.private_extern	_aes_hw_ctr32_encrypt_blocks
+
+.align	5
+_aes_hw_ctr32_encrypt_blocks:
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+	ldr	w5,[x3,#240]
+
+	ldr	w8, [x4, #12]
+	ld1	{v0.4s},[x4]
+
+	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
+	sub	w5,w5,#4
+	mov	x12,#16
+	cmp	x2,#2
+	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
+	sub	w5,w5,#2
+	ld1	{v20.4s,v21.4s},[x7],#32
+	ld1	{v22.4s,v23.4s},[x7],#32
+	ld1	{v7.4s},[x7]
+	add	x7,x3,#32
+	mov	w6,w5
+	csel	x12,xzr,x12,lo
+#ifndef __ARMEB__
+	rev	w8, w8
+#endif
+	orr	v1.16b,v0.16b,v0.16b
+	add	w10, w8, #1
+	orr	v18.16b,v0.16b,v0.16b
+	add	w8, w8, #2
+	orr	v6.16b,v0.16b,v0.16b
+	rev	w10, w10
+	mov	v1.s[3],w10
+	b.ls	Lctr32_tail
+	rev	w12, w8
+	sub	x2,x2,#3		// bias
+	mov	v18.s[3],w12
+	b	Loop3x_ctr32
+
+.align	4
+Loop3x_ctr32:
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v16.16b
+	aesmc	v1.16b,v1.16b
+	aese	v18.16b,v16.16b
+	aesmc	v18.16b,v18.16b
+	ld1	{v16.4s},[x7],#16
+	subs	w6,w6,#2
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v17.16b
+	aesmc	v1.16b,v1.16b
+	aese	v18.16b,v17.16b
+	aesmc	v18.16b,v18.16b
+	ld1	{v17.4s},[x7],#16
+	b.gt	Loop3x_ctr32
+
+	aese	v0.16b,v16.16b
+	aesmc	v4.16b,v0.16b
+	aese	v1.16b,v16.16b
+	aesmc	v5.16b,v1.16b
+	ld1	{v2.16b},[x0],#16
+	orr	v0.16b,v6.16b,v6.16b
+	aese	v18.16b,v16.16b
+	aesmc	v18.16b,v18.16b
+	ld1	{v3.16b},[x0],#16
+	orr	v1.16b,v6.16b,v6.16b
+	aese	v4.16b,v17.16b
+	aesmc	v4.16b,v4.16b
+	aese	v5.16b,v17.16b
+	aesmc	v5.16b,v5.16b
+	ld1	{v19.16b},[x0],#16
+	mov	x7,x3
+	aese	v18.16b,v17.16b
+	aesmc	v17.16b,v18.16b
+	orr	v18.16b,v6.16b,v6.16b
+	add	w9,w8,#1
+	aese	v4.16b,v20.16b
+	aesmc	v4.16b,v4.16b
+	aese	v5.16b,v20.16b
+	aesmc	v5.16b,v5.16b
+	eor	v2.16b,v2.16b,v7.16b
+	add	w10,w8,#2
+	aese	v17.16b,v20.16b
+	aesmc	v17.16b,v17.16b
+	eor	v3.16b,v3.16b,v7.16b
+	add	w8,w8,#3
+	aese	v4.16b,v21.16b
+	aesmc	v4.16b,v4.16b
+	aese	v5.16b,v21.16b
+	aesmc	v5.16b,v5.16b
+	eor	v19.16b,v19.16b,v7.16b
+	rev	w9,w9
+	aese	v17.16b,v21.16b
+	aesmc	v17.16b,v17.16b
+	mov	v0.s[3], w9
+	rev	w10,w10
+	aese	v4.16b,v22.16b
+	aesmc	v4.16b,v4.16b
+	aese	v5.16b,v22.16b
+	aesmc	v5.16b,v5.16b
+	mov	v1.s[3], w10
+	rev	w12,w8
+	aese	v17.16b,v22.16b
+	aesmc	v17.16b,v17.16b
+	mov	v18.s[3], w12
+	subs	x2,x2,#3
+	aese	v4.16b,v23.16b
+	aese	v5.16b,v23.16b
+	aese	v17.16b,v23.16b
+
+	eor	v2.16b,v2.16b,v4.16b
+	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
+	st1	{v2.16b},[x1],#16
+	eor	v3.16b,v3.16b,v5.16b
+	mov	w6,w5
+	st1	{v3.16b},[x1],#16
+	eor	v19.16b,v19.16b,v17.16b
+	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
+	st1	{v19.16b},[x1],#16
+	b.hs	Loop3x_ctr32
+
+	adds	x2,x2,#3
+	b.eq	Lctr32_done
+	cmp	x2,#1
+	mov	x12,#16
+	csel	x12,xzr,x12,eq
+
+Lctr32_tail:
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v16.16b
+	aesmc	v1.16b,v1.16b
+	ld1	{v16.4s},[x7],#16
+	subs	w6,w6,#2
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v17.16b
+	aesmc	v1.16b,v1.16b
+	ld1	{v17.4s},[x7],#16
+	b.gt	Lctr32_tail
+
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v16.16b
+	aesmc	v1.16b,v1.16b
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v17.16b
+	aesmc	v1.16b,v1.16b
+	ld1	{v2.16b},[x0],x12
+	aese	v0.16b,v20.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v20.16b
+	aesmc	v1.16b,v1.16b
+	ld1	{v3.16b},[x0]
+	aese	v0.16b,v21.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v21.16b
+	aesmc	v1.16b,v1.16b
+	eor	v2.16b,v2.16b,v7.16b
+	aese	v0.16b,v22.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v22.16b
+	aesmc	v1.16b,v1.16b
+	eor	v3.16b,v3.16b,v7.16b
+	aese	v0.16b,v23.16b
+	aese	v1.16b,v23.16b
+
+	cmp	x2,#1
+	eor	v2.16b,v2.16b,v0.16b
+	eor	v3.16b,v3.16b,v1.16b
+	st1	{v2.16b},[x1],#16
+	b.eq	Lctr32_done
+	st1	{v3.16b},[x1]
+
+Lctr32_done:
+	ldr	x29,[sp],#16
+	ret
+
+#endif
+#endif  // !OPENSSL_NO_ASM
+#endif  // defined(__aarch64__) && defined(__APPLE__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/aesv8-armx64.linux.aarch64.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/aesv8-armx64.linux.aarch64.S
@ -0,0 +1,782 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__aarch64__) && defined(__linux__)
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(__aarch64__)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+#include <CBigNumBoringSSL_arm_arch.h>
+
+#if __ARM_MAX_ARCH__>=7
+.text
+.arch	armv8-a+crypto
+.section	.rodata
+.align	5
+.Lrcon:
+.long	0x01,0x01,0x01,0x01
+.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
+.long	0x1b,0x1b,0x1b,0x1b
+
+.text
+
+.globl	aes_hw_set_encrypt_key
+.hidden	aes_hw_set_encrypt_key
+.type	aes_hw_set_encrypt_key,%function
+.align	5
+aes_hw_set_encrypt_key:
+.Lenc_key:
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+	mov	x3,#-1
+	cmp	x0,#0
+	b.eq	.Lenc_key_abort
+	cmp	x2,#0
+	b.eq	.Lenc_key_abort
+	mov	x3,#-2
+	cmp	w1,#128
+	b.lt	.Lenc_key_abort
+	cmp	w1,#256
+	b.gt	.Lenc_key_abort
+	tst	w1,#0x3f
+	b.ne	.Lenc_key_abort
+
+	adrp	x3,.Lrcon
+	add	x3,x3,:lo12:.Lrcon
+	cmp	w1,#192
+
+	eor	v0.16b,v0.16b,v0.16b
+	ld1	{v3.16b},[x0],#16
+	mov	w1,#8		// reuse w1
+	ld1	{v1.4s,v2.4s},[x3],#32
+
+	b.lt	.Loop128
+	b.eq	.L192
+	b	.L256
+
+.align	4
+.Loop128:
+	tbl	v6.16b,{v3.16b},v2.16b
+	ext	v5.16b,v0.16b,v3.16b,#12
+	st1	{v3.4s},[x2],#16
+	aese	v6.16b,v0.16b
+	subs	w1,w1,#1
+
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v6.16b,v6.16b,v1.16b
+	eor	v3.16b,v3.16b,v5.16b
+	shl	v1.16b,v1.16b,#1
+	eor	v3.16b,v3.16b,v6.16b
+	b.ne	.Loop128
+
+	ld1	{v1.4s},[x3]
+
+	tbl	v6.16b,{v3.16b},v2.16b
+	ext	v5.16b,v0.16b,v3.16b,#12
+	st1	{v3.4s},[x2],#16
+	aese	v6.16b,v0.16b
+
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v6.16b,v6.16b,v1.16b
+	eor	v3.16b,v3.16b,v5.16b
+	shl	v1.16b,v1.16b,#1
+	eor	v3.16b,v3.16b,v6.16b
+
+	tbl	v6.16b,{v3.16b},v2.16b
+	ext	v5.16b,v0.16b,v3.16b,#12
+	st1	{v3.4s},[x2],#16
+	aese	v6.16b,v0.16b
+
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v6.16b,v6.16b,v1.16b
+	eor	v3.16b,v3.16b,v5.16b
+	eor	v3.16b,v3.16b,v6.16b
+	st1	{v3.4s},[x2]
+	add	x2,x2,#0x50
+
+	mov	w12,#10
+	b	.Ldone
+
+.align	4
+.L192:
+	ld1	{v4.8b},[x0],#8
+	movi	v6.16b,#8			// borrow v6.16b
+	st1	{v3.4s},[x2],#16
+	sub	v2.16b,v2.16b,v6.16b	// adjust the mask
+
+.Loop192:
+	tbl	v6.16b,{v4.16b},v2.16b
+	ext	v5.16b,v0.16b,v3.16b,#12
+	st1	{v4.8b},[x2],#8
+	aese	v6.16b,v0.16b
+	subs	w1,w1,#1
+
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v3.16b,v3.16b,v5.16b
+
+	dup	v5.4s,v3.s[3]
+	eor	v5.16b,v5.16b,v4.16b
+	eor	v6.16b,v6.16b,v1.16b
+	ext	v4.16b,v0.16b,v4.16b,#12
+	shl	v1.16b,v1.16b,#1
+	eor	v4.16b,v4.16b,v5.16b
+	eor	v3.16b,v3.16b,v6.16b
+	eor	v4.16b,v4.16b,v6.16b
+	st1	{v3.4s},[x2],#16
+	b.ne	.Loop192
+
+	mov	w12,#12
+	add	x2,x2,#0x20
+	b	.Ldone
+
+.align	4
+.L256:
+	ld1	{v4.16b},[x0]
+	mov	w1,#7
+	mov	w12,#14
+	st1	{v3.4s},[x2],#16
+
+.Loop256:
+	tbl	v6.16b,{v4.16b},v2.16b
+	ext	v5.16b,v0.16b,v3.16b,#12
+	st1	{v4.4s},[x2],#16
+	aese	v6.16b,v0.16b
+	subs	w1,w1,#1
+
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v6.16b,v6.16b,v1.16b
+	eor	v3.16b,v3.16b,v5.16b
+	shl	v1.16b,v1.16b,#1
+	eor	v3.16b,v3.16b,v6.16b
+	st1	{v3.4s},[x2],#16
+	b.eq	.Ldone
+
+	dup	v6.4s,v3.s[3]		// just splat
+	ext	v5.16b,v0.16b,v4.16b,#12
+	aese	v6.16b,v0.16b
+
+	eor	v4.16b,v4.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v4.16b,v4.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v4.16b,v4.16b,v5.16b
+
+	eor	v4.16b,v4.16b,v6.16b
+	b	.Loop256
+
+.Ldone:
+	str	w12,[x2]
+	mov	x3,#0
+
+.Lenc_key_abort:
+	mov	x0,x3			// return value
+	ldr	x29,[sp],#16
+	ret
+.size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
+
+.globl	aes_hw_set_decrypt_key
+.hidden	aes_hw_set_decrypt_key
+.type	aes_hw_set_decrypt_key,%function
+.align	5
+aes_hw_set_decrypt_key:
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+	bl	.Lenc_key
+
+	cmp	x0,#0
+	b.ne	.Ldec_key_abort
+
+	sub	x2,x2,#240		// restore original x2
+	mov	x4,#-16
+	add	x0,x2,x12,lsl#4	// end of key schedule
+
+	ld1	{v0.4s},[x2]
+	ld1	{v1.4s},[x0]
+	st1	{v0.4s},[x0],x4
+	st1	{v1.4s},[x2],#16
+
+.Loop_imc:
+	ld1	{v0.4s},[x2]
+	ld1	{v1.4s},[x0]
+	aesimc	v0.16b,v0.16b
+	aesimc	v1.16b,v1.16b
+	st1	{v0.4s},[x0],x4
+	st1	{v1.4s},[x2],#16
+	cmp	x0,x2
+	b.hi	.Loop_imc
+
+	ld1	{v0.4s},[x2]
+	aesimc	v0.16b,v0.16b
+	st1	{v0.4s},[x0]
+
+	eor	x0,x0,x0		// return value
+.Ldec_key_abort:
+	ldp	x29,x30,[sp],#16
+	ret
+.size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
+.globl	aes_hw_encrypt
+.hidden	aes_hw_encrypt
+.type	aes_hw_encrypt,%function
+.align	5
+aes_hw_encrypt:
+	ldr	w3,[x2,#240]
+	ld1	{v0.4s},[x2],#16
+	ld1	{v2.16b},[x0]
+	sub	w3,w3,#2
+	ld1	{v1.4s},[x2],#16
+
+.Loop_enc:
+	aese	v2.16b,v0.16b
+	aesmc	v2.16b,v2.16b
+	ld1	{v0.4s},[x2],#16
+	subs	w3,w3,#2
+	aese	v2.16b,v1.16b
+	aesmc	v2.16b,v2.16b
+	ld1	{v1.4s},[x2],#16
+	b.gt	.Loop_enc
+
+	aese	v2.16b,v0.16b
+	aesmc	v2.16b,v2.16b
+	ld1	{v0.4s},[x2]
+	aese	v2.16b,v1.16b
+	eor	v2.16b,v2.16b,v0.16b
+
+	st1	{v2.16b},[x1]
+	ret
+.size	aes_hw_encrypt,.-aes_hw_encrypt
+.globl	aes_hw_decrypt
+.hidden	aes_hw_decrypt
+.type	aes_hw_decrypt,%function
+.align	5
+aes_hw_decrypt:
+	ldr	w3,[x2,#240]
+	ld1	{v0.4s},[x2],#16
+	ld1	{v2.16b},[x0]
+	sub	w3,w3,#2
+	ld1	{v1.4s},[x2],#16
+
+.Loop_dec:
+	aesd	v2.16b,v0.16b
+	aesimc	v2.16b,v2.16b
+	ld1	{v0.4s},[x2],#16
+	subs	w3,w3,#2
+	aesd	v2.16b,v1.16b
+	aesimc	v2.16b,v2.16b
+	ld1	{v1.4s},[x2],#16
+	b.gt	.Loop_dec
+
+	aesd	v2.16b,v0.16b
+	aesimc	v2.16b,v2.16b
+	ld1	{v0.4s},[x2]
+	aesd	v2.16b,v1.16b
+	eor	v2.16b,v2.16b,v0.16b
+
+	st1	{v2.16b},[x1]
+	ret
+.size	aes_hw_decrypt,.-aes_hw_decrypt
+.globl	aes_hw_cbc_encrypt
+.hidden	aes_hw_cbc_encrypt
+.type	aes_hw_cbc_encrypt,%function
+.align	5
+aes_hw_cbc_encrypt:
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+	subs	x2,x2,#16
+	mov	x8,#16
+	b.lo	.Lcbc_abort
+	csel	x8,xzr,x8,eq
+
+	cmp	w5,#0			// en- or decrypting?
+	ldr	w5,[x3,#240]
+	and	x2,x2,#-16
+	ld1	{v6.16b},[x4]
+	ld1	{v0.16b},[x0],x8
+
+	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
+	sub	w5,w5,#6
+	add	x7,x3,x5,lsl#4	// pointer to last 7 round keys
+	sub	w5,w5,#2
+	ld1	{v18.4s,v19.4s},[x7],#32
+	ld1	{v20.4s,v21.4s},[x7],#32
+	ld1	{v22.4s,v23.4s},[x7],#32
+	ld1	{v7.4s},[x7]
+
+	add	x7,x3,#32
+	mov	w6,w5
+	b.eq	.Lcbc_dec
+
+	cmp	w5,#2
+	eor	v0.16b,v0.16b,v6.16b
+	eor	v5.16b,v16.16b,v7.16b
+	b.eq	.Lcbc_enc128
+
+	ld1	{v2.4s,v3.4s},[x7]
+	add	x7,x3,#16
+	add	x6,x3,#16*4
+	add	x12,x3,#16*5
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	add	x14,x3,#16*6
+	add	x3,x3,#16*7
+	b	.Lenter_cbc_enc
+
+.align	4
+.Loop_cbc_enc:
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	st1	{v6.16b},[x1],#16
+.Lenter_cbc_enc:
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v2.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v16.4s},[x6]
+	cmp	w5,#4
+	aese	v0.16b,v3.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v17.4s},[x12]
+	b.eq	.Lcbc_enc192
+
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v16.4s},[x14]
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v17.4s},[x3]
+	nop
+
+.Lcbc_enc192:
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	subs	x2,x2,#16
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	csel	x8,xzr,x8,eq
+	aese	v0.16b,v18.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v19.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v16.16b},[x0],x8
+	aese	v0.16b,v20.16b
+	aesmc	v0.16b,v0.16b
+	eor	v16.16b,v16.16b,v5.16b
+	aese	v0.16b,v21.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v17.4s},[x7]		// re-pre-load rndkey[1]
+	aese	v0.16b,v22.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v23.16b
+	eor	v6.16b,v0.16b,v7.16b
+	b.hs	.Loop_cbc_enc
+
+	st1	{v6.16b},[x1],#16
+	b	.Lcbc_done
+
+.align	5
+.Lcbc_enc128:
+	ld1	{v2.4s,v3.4s},[x7]
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	b	.Lenter_cbc_enc128
+.Loop_cbc_enc128:
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	st1	{v6.16b},[x1],#16
+.Lenter_cbc_enc128:
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	subs	x2,x2,#16
+	aese	v0.16b,v2.16b
+	aesmc	v0.16b,v0.16b
+	csel	x8,xzr,x8,eq
+	aese	v0.16b,v3.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v18.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v19.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v16.16b},[x0],x8
+	aese	v0.16b,v20.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v21.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v22.16b
+	aesmc	v0.16b,v0.16b
+	eor	v16.16b,v16.16b,v5.16b
+	aese	v0.16b,v23.16b
+	eor	v6.16b,v0.16b,v7.16b
+	b.hs	.Loop_cbc_enc128
+
+	st1	{v6.16b},[x1],#16
+	b	.Lcbc_done
+.align	5
+.Lcbc_dec:
+	ld1	{v18.16b},[x0],#16
+	subs	x2,x2,#32		// bias
+	add	w6,w5,#2
+	orr	v3.16b,v0.16b,v0.16b
+	orr	v1.16b,v0.16b,v0.16b
+	orr	v19.16b,v18.16b,v18.16b
+	b.lo	.Lcbc_dec_tail
+
+	orr	v1.16b,v18.16b,v18.16b
+	ld1	{v18.16b},[x0],#16
+	orr	v2.16b,v0.16b,v0.16b
+	orr	v3.16b,v1.16b,v1.16b
+	orr	v19.16b,v18.16b,v18.16b
+
+.Loop3x_cbc_dec:
+	aesd	v0.16b,v16.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v16.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v16.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v16.4s},[x7],#16
+	subs	w6,w6,#2
+	aesd	v0.16b,v17.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v17.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v17.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v17.4s},[x7],#16
+	b.gt	.Loop3x_cbc_dec
+
+	aesd	v0.16b,v16.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v16.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v16.16b
+	aesimc	v18.16b,v18.16b
+	eor	v4.16b,v6.16b,v7.16b
+	subs	x2,x2,#0x30
+	eor	v5.16b,v2.16b,v7.16b
+	csel	x6,x2,x6,lo			// x6, w6, is zero at this point
+	aesd	v0.16b,v17.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v17.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v17.16b
+	aesimc	v18.16b,v18.16b
+	eor	v17.16b,v3.16b,v7.16b
+	add	x0,x0,x6		// x0 is adjusted in such way that
+					// at exit from the loop v1.16b-v18.16b
+					// are loaded with last "words"
+	orr	v6.16b,v19.16b,v19.16b
+	mov	x7,x3
+	aesd	v0.16b,v20.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v20.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v20.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v2.16b},[x0],#16
+	aesd	v0.16b,v21.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v21.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v21.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v3.16b},[x0],#16
+	aesd	v0.16b,v22.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v22.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v22.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v19.16b},[x0],#16
+	aesd	v0.16b,v23.16b
+	aesd	v1.16b,v23.16b
+	aesd	v18.16b,v23.16b
+	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
+	add	w6,w5,#2
+	eor	v4.16b,v4.16b,v0.16b
+	eor	v5.16b,v5.16b,v1.16b
+	eor	v18.16b,v18.16b,v17.16b
+	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
+	st1	{v4.16b},[x1],#16
+	orr	v0.16b,v2.16b,v2.16b
+	st1	{v5.16b},[x1],#16
+	orr	v1.16b,v3.16b,v3.16b
+	st1	{v18.16b},[x1],#16
+	orr	v18.16b,v19.16b,v19.16b
+	b.hs	.Loop3x_cbc_dec
+
+	cmn	x2,#0x30
+	b.eq	.Lcbc_done
+	nop
+
+.Lcbc_dec_tail:
+	aesd	v1.16b,v16.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v16.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v16.4s},[x7],#16
+	subs	w6,w6,#2
+	aesd	v1.16b,v17.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v17.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v17.4s},[x7],#16
+	b.gt	.Lcbc_dec_tail
+
+	aesd	v1.16b,v16.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v16.16b
+	aesimc	v18.16b,v18.16b
+	aesd	v1.16b,v17.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v17.16b
+	aesimc	v18.16b,v18.16b
+	aesd	v1.16b,v20.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v20.16b
+	aesimc	v18.16b,v18.16b
+	cmn	x2,#0x20
+	aesd	v1.16b,v21.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v21.16b
+	aesimc	v18.16b,v18.16b
+	eor	v5.16b,v6.16b,v7.16b
+	aesd	v1.16b,v22.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v22.16b
+	aesimc	v18.16b,v18.16b
+	eor	v17.16b,v3.16b,v7.16b
+	aesd	v1.16b,v23.16b
+	aesd	v18.16b,v23.16b
+	b.eq	.Lcbc_dec_one
+	eor	v5.16b,v5.16b,v1.16b
+	eor	v17.16b,v17.16b,v18.16b
+	orr	v6.16b,v19.16b,v19.16b
+	st1	{v5.16b},[x1],#16
+	st1	{v17.16b},[x1],#16
+	b	.Lcbc_done
+
+.Lcbc_dec_one:
+	eor	v5.16b,v5.16b,v18.16b
+	orr	v6.16b,v19.16b,v19.16b
+	st1	{v5.16b},[x1],#16
+
+.Lcbc_done:
+	st1	{v6.16b},[x4]
+.Lcbc_abort:
+	ldr	x29,[sp],#16
+	ret
+.size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
+.globl	aes_hw_ctr32_encrypt_blocks
+.hidden	aes_hw_ctr32_encrypt_blocks
+.type	aes_hw_ctr32_encrypt_blocks,%function
+.align	5
+aes_hw_ctr32_encrypt_blocks:
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+	ldr	w5,[x3,#240]
+
+	ldr	w8, [x4, #12]
+	ld1	{v0.4s},[x4]
+
+	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
+	sub	w5,w5,#4
+	mov	x12,#16
+	cmp	x2,#2
+	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
+	sub	w5,w5,#2
+	ld1	{v20.4s,v21.4s},[x7],#32
+	ld1	{v22.4s,v23.4s},[x7],#32
+	ld1	{v7.4s},[x7]
+	add	x7,x3,#32
+	mov	w6,w5
+	csel	x12,xzr,x12,lo
+#ifndef __ARMEB__
+	rev	w8, w8
+#endif
+	orr	v1.16b,v0.16b,v0.16b
+	add	w10, w8, #1
+	orr	v18.16b,v0.16b,v0.16b
+	add	w8, w8, #2
+	orr	v6.16b,v0.16b,v0.16b
+	rev	w10, w10
+	mov	v1.s[3],w10
+	b.ls	.Lctr32_tail
+	rev	w12, w8
+	sub	x2,x2,#3		// bias
+	mov	v18.s[3],w12
+	b	.Loop3x_ctr32
+
+.align	4
+.Loop3x_ctr32:
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v16.16b
+	aesmc	v1.16b,v1.16b
+	aese	v18.16b,v16.16b
+	aesmc	v18.16b,v18.16b
+	ld1	{v16.4s},[x7],#16
+	subs	w6,w6,#2
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v17.16b
+	aesmc	v1.16b,v1.16b
+	aese	v18.16b,v17.16b
+	aesmc	v18.16b,v18.16b
+	ld1	{v17.4s},[x7],#16
+	b.gt	.Loop3x_ctr32
+
+	aese	v0.16b,v16.16b
+	aesmc	v4.16b,v0.16b
+	aese	v1.16b,v16.16b
+	aesmc	v5.16b,v1.16b
+	ld1	{v2.16b},[x0],#16
+	orr	v0.16b,v6.16b,v6.16b
+	aese	v18.16b,v16.16b
+	aesmc	v18.16b,v18.16b
+	ld1	{v3.16b},[x0],#16
+	orr	v1.16b,v6.16b,v6.16b
+	aese	v4.16b,v17.16b
+	aesmc	v4.16b,v4.16b
+	aese	v5.16b,v17.16b
+	aesmc	v5.16b,v5.16b
+	ld1	{v19.16b},[x0],#16
+	mov	x7,x3
+	aese	v18.16b,v17.16b
+	aesmc	v17.16b,v18.16b
+	orr	v18.16b,v6.16b,v6.16b
+	add	w9,w8,#1
+	aese	v4.16b,v20.16b
+	aesmc	v4.16b,v4.16b
+	aese	v5.16b,v20.16b
+	aesmc	v5.16b,v5.16b
+	eor	v2.16b,v2.16b,v7.16b
+	add	w10,w8,#2
+	aese	v17.16b,v20.16b
+	aesmc	v17.16b,v17.16b
+	eor	v3.16b,v3.16b,v7.16b
+	add	w8,w8,#3
+	aese	v4.16b,v21.16b
+	aesmc	v4.16b,v4.16b
+	aese	v5.16b,v21.16b
+	aesmc	v5.16b,v5.16b
+	eor	v19.16b,v19.16b,v7.16b
+	rev	w9,w9
+	aese	v17.16b,v21.16b
+	aesmc	v17.16b,v17.16b
+	mov	v0.s[3], w9
+	rev	w10,w10
+	aese	v4.16b,v22.16b
+	aesmc	v4.16b,v4.16b
+	aese	v5.16b,v22.16b
+	aesmc	v5.16b,v5.16b
+	mov	v1.s[3], w10
+	rev	w12,w8
+	aese	v17.16b,v22.16b
+	aesmc	v17.16b,v17.16b
+	mov	v18.s[3], w12
+	subs	x2,x2,#3
+	aese	v4.16b,v23.16b
+	aese	v5.16b,v23.16b
+	aese	v17.16b,v23.16b
+
+	eor	v2.16b,v2.16b,v4.16b
+	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
+	st1	{v2.16b},[x1],#16
+	eor	v3.16b,v3.16b,v5.16b
+	mov	w6,w5
+	st1	{v3.16b},[x1],#16
+	eor	v19.16b,v19.16b,v17.16b
+	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
+	st1	{v19.16b},[x1],#16
+	b.hs	.Loop3x_ctr32
+
+	adds	x2,x2,#3
+	b.eq	.Lctr32_done
+	cmp	x2,#1
+	mov	x12,#16
+	csel	x12,xzr,x12,eq
+
+.Lctr32_tail:
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v16.16b
+	aesmc	v1.16b,v1.16b
+	ld1	{v16.4s},[x7],#16
+	subs	w6,w6,#2
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v17.16b
+	aesmc	v1.16b,v1.16b
+	ld1	{v17.4s},[x7],#16
+	b.gt	.Lctr32_tail
+
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v16.16b
+	aesmc	v1.16b,v1.16b
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v17.16b
+	aesmc	v1.16b,v1.16b
+	ld1	{v2.16b},[x0],x12
+	aese	v0.16b,v20.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v20.16b
+	aesmc	v1.16b,v1.16b
+	ld1	{v3.16b},[x0]
+	aese	v0.16b,v21.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v21.16b
+	aesmc	v1.16b,v1.16b
+	eor	v2.16b,v2.16b,v7.16b
+	aese	v0.16b,v22.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v22.16b
+	aesmc	v1.16b,v1.16b
+	eor	v3.16b,v3.16b,v7.16b
+	aese	v0.16b,v23.16b
+	aese	v1.16b,v23.16b
+
+	cmp	x2,#1
+	eor	v2.16b,v2.16b,v0.16b
+	eor	v3.16b,v3.16b,v1.16b
+	st1	{v2.16b},[x1],#16
+	b.eq	.Lctr32_done
+	st1	{v3.16b},[x1]
+
+.Lctr32_done:
+	ldr	x29,[sp],#16
+	ret
+.size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
+#endif
+#endif
+#endif  // !OPENSSL_NO_ASM
+.section	.note.GNU-stack,"",%progbits
+#endif  // defined(__aarch64__) && defined(__linux__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/armv4-mont.ios.arm.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/armv4-mont.ios.arm.S
@ -0,0 +1,989 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__arm__) && defined(__APPLE__)
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+#include <CBigNumBoringSSL_arm_arch.h>
+
+@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
+@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
+
+
+.text
+#if defined(__thumb2__)
+.syntax	unified
+.thumb
+#else
+.code	32
+#endif
+
+#if __ARM_MAX_ARCH__>=7
+.align	5
+LOPENSSL_armcap:
+.word	OPENSSL_armcap_P-Lbn_mul_mont
+#endif
+
+.globl	_bn_mul_mont
+.private_extern	_bn_mul_mont
+#ifdef __thumb2__
+.thumb_func	_bn_mul_mont
+#endif
+
+.align	5
+_bn_mul_mont:
+Lbn_mul_mont:
+	ldr	ip,[sp,#4]		@ load num
+	stmdb	sp!,{r0,r2}		@ sp points at argument block
+#if __ARM_MAX_ARCH__>=7
+	tst	ip,#7
+	bne	Lialu
+	adr	r0,Lbn_mul_mont
+	ldr	r2,LOPENSSL_armcap
+	ldr	r0,[r0,r2]
+#ifdef	__APPLE__
+	ldr	r0,[r0]
+#endif
+	tst	r0,#ARMV7_NEON		@ NEON available?
+	ldmia	sp, {r0,r2}
+	beq	Lialu
+	add	sp,sp,#8
+	b	bn_mul8x_mont_neon
+.align	4
+Lialu:
+#endif
+	cmp	ip,#2
+	mov	r0,ip			@ load num
+#ifdef	__thumb2__
+	ittt	lt
+#endif
+	movlt	r0,#0
+	addlt	sp,sp,#2*4
+	blt	Labrt
+
+	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}		@ save 10 registers
+
+	mov	r0,r0,lsl#2		@ rescale r0 for byte count
+	sub	sp,sp,r0		@ alloca(4*num)
+	sub	sp,sp,#4		@ +extra dword
+	sub	r0,r0,#4		@ "num=num-1"
+	add	r4,r2,r0		@ &bp[num-1]
+
+	add	r0,sp,r0		@ r0 to point at &tp[num-1]
+	ldr	r8,[r0,#14*4]		@ &n0
+	ldr	r2,[r2]		@ bp[0]
+	ldr	r5,[r1],#4		@ ap[0],ap++
+	ldr	r6,[r3],#4		@ np[0],np++
+	ldr	r8,[r8]		@ *n0
+	str	r4,[r0,#15*4]		@ save &bp[num]
+
+	umull	r10,r11,r5,r2	@ ap[0]*bp[0]
+	str	r8,[r0,#14*4]		@ save n0 value
+	mul	r8,r10,r8		@ "tp[0]"*n0
+	mov	r12,#0
+	umlal	r10,r12,r6,r8	@ np[0]*n0+"t[0]"
+	mov	r4,sp
+
+L1st:
+	ldr	r5,[r1],#4		@ ap[j],ap++
+	mov	r10,r11
+	ldr	r6,[r3],#4		@ np[j],np++
+	mov	r11,#0
+	umlal	r10,r11,r5,r2	@ ap[j]*bp[0]
+	mov	r14,#0
+	umlal	r12,r14,r6,r8	@ np[j]*n0
+	adds	r12,r12,r10
+	str	r12,[r4],#4		@ tp[j-1]=,tp++
+	adc	r12,r14,#0
+	cmp	r4,r0
+	bne	L1st
+
+	adds	r12,r12,r11
+	ldr	r4,[r0,#13*4]		@ restore bp
+	mov	r14,#0
+	ldr	r8,[r0,#14*4]		@ restore n0
+	adc	r14,r14,#0
+	str	r12,[r0]		@ tp[num-1]=
+	mov	r7,sp
+	str	r14,[r0,#4]		@ tp[num]=
+
+Louter:
+	sub	r7,r0,r7		@ "original" r0-1 value
+	sub	r1,r1,r7		@ "rewind" ap to &ap[1]
+	ldr	r2,[r4,#4]!		@ *(++bp)
+	sub	r3,r3,r7		@ "rewind" np to &np[1]
+	ldr	r5,[r1,#-4]		@ ap[0]
+	ldr	r10,[sp]		@ tp[0]
+	ldr	r6,[r3,#-4]		@ np[0]
+	ldr	r7,[sp,#4]		@ tp[1]
+
+	mov	r11,#0
+	umlal	r10,r11,r5,r2	@ ap[0]*bp[i]+tp[0]
+	str	r4,[r0,#13*4]		@ save bp
+	mul	r8,r10,r8
+	mov	r12,#0
+	umlal	r10,r12,r6,r8	@ np[0]*n0+"tp[0]"
+	mov	r4,sp
+
+Linner:
+	ldr	r5,[r1],#4		@ ap[j],ap++
+	adds	r10,r11,r7		@ +=tp[j]
+	ldr	r6,[r3],#4		@ np[j],np++
+	mov	r11,#0
+	umlal	r10,r11,r5,r2	@ ap[j]*bp[i]
+	mov	r14,#0
+	umlal	r12,r14,r6,r8	@ np[j]*n0
+	adc	r11,r11,#0
+	ldr	r7,[r4,#8]		@ tp[j+1]
+	adds	r12,r12,r10
+	str	r12,[r4],#4		@ tp[j-1]=,tp++
+	adc	r12,r14,#0
+	cmp	r4,r0
+	bne	Linner
+
+	adds	r12,r12,r11
+	mov	r14,#0
+	ldr	r4,[r0,#13*4]		@ restore bp
+	adc	r14,r14,#0
+	ldr	r8,[r0,#14*4]		@ restore n0
+	adds	r12,r12,r7
+	ldr	r7,[r0,#15*4]		@ restore &bp[num]
+	adc	r14,r14,#0
+	str	r12,[r0]		@ tp[num-1]=
+	str	r14,[r0,#4]		@ tp[num]=
+
+	cmp	r4,r7
+#ifdef	__thumb2__
+	itt	ne
+#endif
+	movne	r7,sp
+	bne	Louter
+
+	ldr	r2,[r0,#12*4]		@ pull rp
+	mov	r5,sp
+	add	r0,r0,#4		@ r0 to point at &tp[num]
+	sub	r5,r0,r5		@ "original" num value
+	mov	r4,sp			@ "rewind" r4
+	mov	r1,r4			@ "borrow" r1
+	sub	r3,r3,r5		@ "rewind" r3 to &np[0]
+
+	subs	r7,r7,r7		@ "clear" carry flag
+Lsub:	ldr	r7,[r4],#4
+	ldr	r6,[r3],#4
+	sbcs	r7,r7,r6		@ tp[j]-np[j]
+	str	r7,[r2],#4		@ rp[j]=
+	teq	r4,r0		@ preserve carry
+	bne	Lsub
+	sbcs	r14,r14,#0		@ upmost carry
+	mov	r4,sp			@ "rewind" r4
+	sub	r2,r2,r5		@ "rewind" r2
+
+Lcopy:	ldr	r7,[r4]		@ conditional copy
+	ldr	r5,[r2]
+	str	sp,[r4],#4		@ zap tp
+#ifdef	__thumb2__
+	it	cc
+#endif
+	movcc	r5,r7
+	str	r5,[r2],#4
+	teq	r4,r0		@ preserve carry
+	bne	Lcopy
+
+	mov	sp,r0
+	add	sp,sp,#4		@ skip over tp[num+1]
+	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}		@ restore registers
+	add	sp,sp,#2*4		@ skip over {r0,r2}
+	mov	r0,#1
+Labrt:
+#if __ARM_ARCH__>=5
+	bx	lr				@ bx lr
+#else
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+
+#if __ARM_MAX_ARCH__>=7
+
+
+
+#ifdef __thumb2__
+.thumb_func	bn_mul8x_mont_neon
+#endif
+.align	5
+bn_mul8x_mont_neon:
+	mov	ip,sp
+	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
+	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ ABI specification says so
+	ldmia	ip,{r4,r5}		@ load rest of parameter block
+	mov	ip,sp
+
+	cmp	r5,#8
+	bhi	LNEON_8n
+
+	@ special case for r5==8, everything is in register bank...
+
+	vld1.32	{d28[0]}, [r2,:32]!
+	veor	d8,d8,d8
+	sub	r7,sp,r5,lsl#4
+	vld1.32	{d0,d1,d2,d3},  [r1]!		@ can't specify :32 :-(
+	and	r7,r7,#-64
+	vld1.32	{d30[0]}, [r4,:32]
+	mov	sp,r7			@ alloca
+	vzip.16	d28,d8
+
+	vmull.u32	q6,d28,d0[0]
+	vmull.u32	q7,d28,d0[1]
+	vmull.u32	q8,d28,d1[0]
+	vshl.i64	d29,d13,#16
+	vmull.u32	q9,d28,d1[1]
+
+	vadd.u64	d29,d29,d12
+	veor	d8,d8,d8
+	vmul.u32	d29,d29,d30
+
+	vmull.u32	q10,d28,d2[0]
+	vld1.32	{d4,d5,d6,d7}, [r3]!
+	vmull.u32	q11,d28,d2[1]
+	vmull.u32	q12,d28,d3[0]
+	vzip.16	d29,d8
+	vmull.u32	q13,d28,d3[1]
+
+	vmlal.u32	q6,d29,d4[0]
+	sub	r9,r5,#1
+	vmlal.u32	q7,d29,d4[1]
+	vmlal.u32	q8,d29,d5[0]
+	vmlal.u32	q9,d29,d5[1]
+
+	vmlal.u32	q10,d29,d6[0]
+	vmov	q5,q6
+	vmlal.u32	q11,d29,d6[1]
+	vmov	q6,q7
+	vmlal.u32	q12,d29,d7[0]
+	vmov	q7,q8
+	vmlal.u32	q13,d29,d7[1]
+	vmov	q8,q9
+	vmov	q9,q10
+	vshr.u64	d10,d10,#16
+	vmov	q10,q11
+	vmov	q11,q12
+	vadd.u64	d10,d10,d11
+	vmov	q12,q13
+	veor	q13,q13
+	vshr.u64	d10,d10,#16
+
+	b	LNEON_outer8
+
+.align	4
+LNEON_outer8:
+	vld1.32	{d28[0]}, [r2,:32]!
+	veor	d8,d8,d8
+	vzip.16	d28,d8
+	vadd.u64	d12,d12,d10
+
+	vmlal.u32	q6,d28,d0[0]
+	vmlal.u32	q7,d28,d0[1]
+	vmlal.u32	q8,d28,d1[0]
+	vshl.i64	d29,d13,#16
+	vmlal.u32	q9,d28,d1[1]
+
+	vadd.u64	d29,d29,d12
+	veor	d8,d8,d8
+	subs	r9,r9,#1
+	vmul.u32	d29,d29,d30
+
+	vmlal.u32	q10,d28,d2[0]
+	vmlal.u32	q11,d28,d2[1]
+	vmlal.u32	q12,d28,d3[0]
+	vzip.16	d29,d8
+	vmlal.u32	q13,d28,d3[1]
+
+	vmlal.u32	q6,d29,d4[0]
+	vmlal.u32	q7,d29,d4[1]
+	vmlal.u32	q8,d29,d5[0]
+	vmlal.u32	q9,d29,d5[1]
+
+	vmlal.u32	q10,d29,d6[0]
+	vmov	q5,q6
+	vmlal.u32	q11,d29,d6[1]
+	vmov	q6,q7
+	vmlal.u32	q12,d29,d7[0]
+	vmov	q7,q8
+	vmlal.u32	q13,d29,d7[1]
+	vmov	q8,q9
+	vmov	q9,q10
+	vshr.u64	d10,d10,#16
+	vmov	q10,q11
+	vmov	q11,q12
+	vadd.u64	d10,d10,d11
+	vmov	q12,q13
+	veor	q13,q13
+	vshr.u64	d10,d10,#16
+
+	bne	LNEON_outer8
+
+	vadd.u64	d12,d12,d10
+	mov	r7,sp
+	vshr.u64	d10,d12,#16
+	mov	r8,r5
+	vadd.u64	d13,d13,d10
+	add	r6,sp,#96
+	vshr.u64	d10,d13,#16
+	vzip.16	d12,d13
+
+	b	LNEON_tail_entry
+
+.align	4
+LNEON_8n:
+	veor	q6,q6,q6
+	sub	r7,sp,#128
+	veor	q7,q7,q7
+	sub	r7,r7,r5,lsl#4
+	veor	q8,q8,q8
+	and	r7,r7,#-64
+	veor	q9,q9,q9
+	mov	sp,r7			@ alloca
+	veor	q10,q10,q10
+	add	r7,r7,#256
+	veor	q11,q11,q11
+	sub	r8,r5,#8
+	veor	q12,q12,q12
+	veor	q13,q13,q13
+
+LNEON_8n_init:
+	vst1.64	{q6,q7},[r7,:256]!
+	subs	r8,r8,#8
+	vst1.64	{q8,q9},[r7,:256]!
+	vst1.64	{q10,q11},[r7,:256]!
+	vst1.64	{q12,q13},[r7,:256]!
+	bne	LNEON_8n_init
+
+	add	r6,sp,#256
+	vld1.32	{d0,d1,d2,d3},[r1]!
+	add	r10,sp,#8
+	vld1.32	{d30[0]},[r4,:32]
+	mov	r9,r5
+	b	LNEON_8n_outer
+
+.align	4
+LNEON_8n_outer:
+	vld1.32	{d28[0]},[r2,:32]!	@ *b++
+	veor	d8,d8,d8
+	vzip.16	d28,d8
+	add	r7,sp,#128
+	vld1.32	{d4,d5,d6,d7},[r3]!
+
+	vmlal.u32	q6,d28,d0[0]
+	vmlal.u32	q7,d28,d0[1]
+	veor	d8,d8,d8
+	vmlal.u32	q8,d28,d1[0]
+	vshl.i64	d29,d13,#16
+	vmlal.u32	q9,d28,d1[1]
+	vadd.u64	d29,d29,d12
+	vmlal.u32	q10,d28,d2[0]
+	vmul.u32	d29,d29,d30
+	vmlal.u32	q11,d28,d2[1]
+	vst1.32	{d28},[sp,:64]		@ put aside smashed b[8*i+0]
+	vmlal.u32	q12,d28,d3[0]
+	vzip.16	d29,d8
+	vmlal.u32	q13,d28,d3[1]
+	vld1.32	{d28[0]},[r2,:32]!	@ *b++
+	vmlal.u32	q6,d29,d4[0]
+	veor	d10,d10,d10
+	vmlal.u32	q7,d29,d4[1]
+	vzip.16	d28,d10
+	vmlal.u32	q8,d29,d5[0]
+	vshr.u64	d12,d12,#16
+	vmlal.u32	q9,d29,d5[1]
+	vmlal.u32	q10,d29,d6[0]
+	vadd.u64	d12,d12,d13
+	vmlal.u32	q11,d29,d6[1]
+	vshr.u64	d12,d12,#16
+	vmlal.u32	q12,d29,d7[0]
+	vmlal.u32	q13,d29,d7[1]
+	vadd.u64	d14,d14,d12
+	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+0]
+	vmlal.u32	q7,d28,d0[0]
+	vld1.64	{q6},[r6,:128]!
+	vmlal.u32	q8,d28,d0[1]
+	veor	d8,d8,d8
+	vmlal.u32	q9,d28,d1[0]
+	vshl.i64	d29,d15,#16
+	vmlal.u32	q10,d28,d1[1]
+	vadd.u64	d29,d29,d14
+	vmlal.u32	q11,d28,d2[0]
+	vmul.u32	d29,d29,d30
+	vmlal.u32	q12,d28,d2[1]
+	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+1]
+	vmlal.u32	q13,d28,d3[0]
+	vzip.16	d29,d8
+	vmlal.u32	q6,d28,d3[1]
+	vld1.32	{d28[0]},[r2,:32]!	@ *b++
+	vmlal.u32	q7,d29,d4[0]
+	veor	d10,d10,d10
+	vmlal.u32	q8,d29,d4[1]
+	vzip.16	d28,d10
+	vmlal.u32	q9,d29,d5[0]
+	vshr.u64	d14,d14,#16
+	vmlal.u32	q10,d29,d5[1]
+	vmlal.u32	q11,d29,d6[0]
+	vadd.u64	d14,d14,d15
+	vmlal.u32	q12,d29,d6[1]
+	vshr.u64	d14,d14,#16
+	vmlal.u32	q13,d29,d7[0]
+	vmlal.u32	q6,d29,d7[1]
+	vadd.u64	d16,d16,d14
+	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+1]
+	vmlal.u32	q8,d28,d0[0]
+	vld1.64	{q7},[r6,:128]!
+	vmlal.u32	q9,d28,d0[1]
+	veor	d8,d8,d8
+	vmlal.u32	q10,d28,d1[0]
+	vshl.i64	d29,d17,#16
+	vmlal.u32	q11,d28,d1[1]
+	vadd.u64	d29,d29,d16
+	vmlal.u32	q12,d28,d2[0]
+	vmul.u32	d29,d29,d30
+	vmlal.u32	q13,d28,d2[1]
+	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+2]
+	vmlal.u32	q6,d28,d3[0]
+	vzip.16	d29,d8
+	vmlal.u32	q7,d28,d3[1]
+	vld1.32	{d28[0]},[r2,:32]!	@ *b++
+	vmlal.u32	q8,d29,d4[0]
+	veor	d10,d10,d10
+	vmlal.u32	q9,d29,d4[1]
+	vzip.16	d28,d10
+	vmlal.u32	q10,d29,d5[0]
+	vshr.u64	d16,d16,#16
+	vmlal.u32	q11,d29,d5[1]
+	vmlal.u32	q12,d29,d6[0]
+	vadd.u64	d16,d16,d17
+	vmlal.u32	q13,d29,d6[1]
+	vshr.u64	d16,d16,#16
+	vmlal.u32	q6,d29,d7[0]
+	vmlal.u32	q7,d29,d7[1]
+	vadd.u64	d18,d18,d16
+	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+2]
+	vmlal.u32	q9,d28,d0[0]
+	vld1.64	{q8},[r6,:128]!
+	vmlal.u32	q10,d28,d0[1]
+	veor	d8,d8,d8
+	vmlal.u32	q11,d28,d1[0]
+	vshl.i64	d29,d19,#16
+	vmlal.u32	q12,d28,d1[1]
+	vadd.u64	d29,d29,d18
+	vmlal.u32	q13,d28,d2[0]
+	vmul.u32	d29,d29,d30
+	vmlal.u32	q6,d28,d2[1]
+	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+3]
+	vmlal.u32	q7,d28,d3[0]
+	vzip.16	d29,d8
+	vmlal.u32	q8,d28,d3[1]
+	vld1.32	{d28[0]},[r2,:32]!	@ *b++
+	vmlal.u32	q9,d29,d4[0]
+	veor	d10,d10,d10
+	vmlal.u32	q10,d29,d4[1]
+	vzip.16	d28,d10
+	vmlal.u32	q11,d29,d5[0]
+	vshr.u64	d18,d18,#16
+	vmlal.u32	q12,d29,d5[1]
+	vmlal.u32	q13,d29,d6[0]
+	vadd.u64	d18,d18,d19
+	vmlal.u32	q6,d29,d6[1]
+	vshr.u64	d18,d18,#16
+	vmlal.u32	q7,d29,d7[0]
+	vmlal.u32	q8,d29,d7[1]
+	vadd.u64	d20,d20,d18
+	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+3]
+	vmlal.u32	q10,d28,d0[0]
+	vld1.64	{q9},[r6,:128]!
+	vmlal.u32	q11,d28,d0[1]
+	veor	d8,d8,d8
+	vmlal.u32	q12,d28,d1[0]
+	vshl.i64	d29,d21,#16
+	vmlal.u32	q13,d28,d1[1]
+	vadd.u64	d29,d29,d20
+	vmlal.u32	q6,d28,d2[0]
+	vmul.u32	d29,d29,d30
+	vmlal.u32	q7,d28,d2[1]
+	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+4]
+	vmlal.u32	q8,d28,d3[0]
+	vzip.16	d29,d8
+	vmlal.u32	q9,d28,d3[1]
+	vld1.32	{d28[0]},[r2,:32]!	@ *b++
+	vmlal.u32	q10,d29,d4[0]
+	veor	d10,d10,d10
+	vmlal.u32	q11,d29,d4[1]
+	vzip.16	d28,d10
+	vmlal.u32	q12,d29,d5[0]
+	vshr.u64	d20,d20,#16
+	vmlal.u32	q13,d29,d5[1]
+	vmlal.u32	q6,d29,d6[0]
+	vadd.u64	d20,d20,d21
+	vmlal.u32	q7,d29,d6[1]
+	vshr.u64	d20,d20,#16
+	vmlal.u32	q8,d29,d7[0]
+	vmlal.u32	q9,d29,d7[1]
+	vadd.u64	d22,d22,d20
+	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+4]
+	vmlal.u32	q11,d28,d0[0]
+	vld1.64	{q10},[r6,:128]!
+	vmlal.u32	q12,d28,d0[1]
+	veor	d8,d8,d8
+	vmlal.u32	q13,d28,d1[0]
+	vshl.i64	d29,d23,#16
+	vmlal.u32	q6,d28,d1[1]
+	vadd.u64	d29,d29,d22
+	vmlal.u32	q7,d28,d2[0]
+	vmul.u32	d29,d29,d30
+	vmlal.u32	q8,d28,d2[1]
+	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+5]
+	vmlal.u32	q9,d28,d3[0]
+	vzip.16	d29,d8
+	vmlal.u32	q10,d28,d3[1]
+	vld1.32	{d28[0]},[r2,:32]!	@ *b++
+	vmlal.u32	q11,d29,d4[0]
+	veor	d10,d10,d10
+	vmlal.u32	q12,d29,d4[1]
+	vzip.16	d28,d10
+	vmlal.u32	q13,d29,d5[0]
+	vshr.u64	d22,d22,#16
+	vmlal.u32	q6,d29,d5[1]
+	vmlal.u32	q7,d29,d6[0]
+	vadd.u64	d22,d22,d23
+	vmlal.u32	q8,d29,d6[1]
+	vshr.u64	d22,d22,#16
+	vmlal.u32	q9,d29,d7[0]
+	vmlal.u32	q10,d29,d7[1]
+	vadd.u64	d24,d24,d22
+	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+5]
+	vmlal.u32	q12,d28,d0[0]
+	vld1.64	{q11},[r6,:128]!
+	vmlal.u32	q13,d28,d0[1]
+	veor	d8,d8,d8
+	vmlal.u32	q6,d28,d1[0]
+	vshl.i64	d29,d25,#16
+	vmlal.u32	q7,d28,d1[1]
+	vadd.u64	d29,d29,d24
+	vmlal.u32	q8,d28,d2[0]
+	vmul.u32	d29,d29,d30
+	vmlal.u32	q9,d28,d2[1]
+	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+6]
+	vmlal.u32	q10,d28,d3[0]
+	vzip.16	d29,d8
+	vmlal.u32	q11,d28,d3[1]
+	vld1.32	{d28[0]},[r2,:32]!	@ *b++
+	vmlal.u32	q12,d29,d4[0]
+	veor	d10,d10,d10
+	vmlal.u32	q13,d29,d4[1]
+	vzip.16	d28,d10
+	vmlal.u32	q6,d29,d5[0]
+	vshr.u64	d24,d24,#16
+	vmlal.u32	q7,d29,d5[1]
+	vmlal.u32	q8,d29,d6[0]
+	vadd.u64	d24,d24,d25
+	vmlal.u32	q9,d29,d6[1]
+	vshr.u64	d24,d24,#16
+	vmlal.u32	q10,d29,d7[0]
+	vmlal.u32	q11,d29,d7[1]
+	vadd.u64	d26,d26,d24
+	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+6]
+	vmlal.u32	q13,d28,d0[0]
+	vld1.64	{q12},[r6,:128]!
+	vmlal.u32	q6,d28,d0[1]
+	veor	d8,d8,d8
+	vmlal.u32	q7,d28,d1[0]
+	vshl.i64	d29,d27,#16
+	vmlal.u32	q8,d28,d1[1]
+	vadd.u64	d29,d29,d26
+	vmlal.u32	q9,d28,d2[0]
+	vmul.u32	d29,d29,d30
+	vmlal.u32	q10,d28,d2[1]
+	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+7]
+	vmlal.u32	q11,d28,d3[0]
+	vzip.16	d29,d8
+	vmlal.u32	q12,d28,d3[1]
+	vld1.32	{d28},[sp,:64]		@ pull smashed b[8*i+0]
+	vmlal.u32	q13,d29,d4[0]
+	vld1.32	{d0,d1,d2,d3},[r1]!
+	vmlal.u32	q6,d29,d4[1]
+	vmlal.u32	q7,d29,d5[0]
+	vshr.u64	d26,d26,#16
+	vmlal.u32	q8,d29,d5[1]
+	vmlal.u32	q9,d29,d6[0]
+	vadd.u64	d26,d26,d27
+	vmlal.u32	q10,d29,d6[1]
+	vshr.u64	d26,d26,#16
+	vmlal.u32	q11,d29,d7[0]
+	vmlal.u32	q12,d29,d7[1]
+	vadd.u64	d12,d12,d26
+	vst1.32	{d29},[r10,:64]	@ put aside smashed m[8*i+7]
+	add	r10,sp,#8		@ rewind
+	sub	r8,r5,#8
+	b	LNEON_8n_inner
+
+.align	4
+LNEON_8n_inner:
+	subs	r8,r8,#8
+	vmlal.u32	q6,d28,d0[0]
+	vld1.64	{q13},[r6,:128]
+	vmlal.u32	q7,d28,d0[1]
+	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+0]
+	vmlal.u32	q8,d28,d1[0]
+	vld1.32	{d4,d5,d6,d7},[r3]!
+	vmlal.u32	q9,d28,d1[1]
+	it	ne
+	addne	r6,r6,#16	@ don't advance in last iteration
+	vmlal.u32	q10,d28,d2[0]
+	vmlal.u32	q11,d28,d2[1]
+	vmlal.u32	q12,d28,d3[0]
+	vmlal.u32	q13,d28,d3[1]
+	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+1]
+	vmlal.u32	q6,d29,d4[0]
+	vmlal.u32	q7,d29,d4[1]
+	vmlal.u32	q8,d29,d5[0]
+	vmlal.u32	q9,d29,d5[1]
+	vmlal.u32	q10,d29,d6[0]
+	vmlal.u32	q11,d29,d6[1]
+	vmlal.u32	q12,d29,d7[0]
+	vmlal.u32	q13,d29,d7[1]
+	vst1.64	{q6},[r7,:128]!
+	vmlal.u32	q7,d28,d0[0]
+	vld1.64	{q6},[r6,:128]
+	vmlal.u32	q8,d28,d0[1]
+	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+1]
+	vmlal.u32	q9,d28,d1[0]
+	it	ne
+	addne	r6,r6,#16	@ don't advance in last iteration
+	vmlal.u32	q10,d28,d1[1]
+	vmlal.u32	q11,d28,d2[0]
+	vmlal.u32	q12,d28,d2[1]
+	vmlal.u32	q13,d28,d3[0]
+	vmlal.u32	q6,d28,d3[1]
+	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+2]
+	vmlal.u32	q7,d29,d4[0]
+	vmlal.u32	q8,d29,d4[1]
+	vmlal.u32	q9,d29,d5[0]
+	vmlal.u32	q10,d29,d5[1]
+	vmlal.u32	q11,d29,d6[0]
+	vmlal.u32	q12,d29,d6[1]
+	vmlal.u32	q13,d29,d7[0]
+	vmlal.u32	q6,d29,d7[1]
+	vst1.64	{q7},[r7,:128]!
+	vmlal.u32	q8,d28,d0[0]
+	vld1.64	{q7},[r6,:128]
+	vmlal.u32	q9,d28,d0[1]
+	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+2]
+	vmlal.u32	q10,d28,d1[0]
+	it	ne
+	addne	r6,r6,#16	@ don't advance in last iteration
+	vmlal.u32	q11,d28,d1[1]
+	vmlal.u32	q12,d28,d2[0]
+	vmlal.u32	q13,d28,d2[1]
+	vmlal.u32	q6,d28,d3[0]
+	vmlal.u32	q7,d28,d3[1]
+	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+3]
+	vmlal.u32	q8,d29,d4[0]
+	vmlal.u32	q9,d29,d4[1]
+	vmlal.u32	q10,d29,d5[0]
+	vmlal.u32	q11,d29,d5[1]
+	vmlal.u32	q12,d29,d6[0]
+	vmlal.u32	q13,d29,d6[1]
+	vmlal.u32	q6,d29,d7[0]
+	vmlal.u32	q7,d29,d7[1]
+	vst1.64	{q8},[r7,:128]!
+	vmlal.u32	q9,d28,d0[0]
+	vld1.64	{q8},[r6,:128]
+	vmlal.u32	q10,d28,d0[1]
+	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+3]
+	vmlal.u32	q11,d28,d1[0]
+	it	ne
+	addne	r6,r6,#16	@ don't advance in last iteration
+	vmlal.u32	q12,d28,d1[1]
+	vmlal.u32	q13,d28,d2[0]
+	vmlal.u32	q6,d28,d2[1]
+	vmlal.u32	q7,d28,d3[0]
+	vmlal.u32	q8,d28,d3[1]
+	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+4]
+	vmlal.u32	q9,d29,d4[0]
+	vmlal.u32	q10,d29,d4[1]
+	vmlal.u32	q11,d29,d5[0]
+	vmlal.u32	q12,d29,d5[1]
+	vmlal.u32	q13,d29,d6[0]
+	vmlal.u32	q6,d29,d6[1]
+	vmlal.u32	q7,d29,d7[0]
+	vmlal.u32	q8,d29,d7[1]
+	vst1.64	{q9},[r7,:128]!
+	vmlal.u32	q10,d28,d0[0]
+	vld1.64	{q9},[r6,:128]
+	vmlal.u32	q11,d28,d0[1]
+	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+4]
+	vmlal.u32	q12,d28,d1[0]
+	it	ne
+	addne	r6,r6,#16	@ don't advance in last iteration
+	vmlal.u32	q13,d28,d1[1]
+	vmlal.u32	q6,d28,d2[0]
+	vmlal.u32	q7,d28,d2[1]
+	vmlal.u32	q8,d28,d3[0]
+	vmlal.u32	q9,d28,d3[1]
+	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+5]
+	vmlal.u32	q10,d29,d4[0]
+	vmlal.u32	q11,d29,d4[1]
+	vmlal.u32	q12,d29,d5[0]
+	vmlal.u32	q13,d29,d5[1]
+	vmlal.u32	q6,d29,d6[0]
+	vmlal.u32	q7,d29,d6[1]
+	vmlal.u32	q8,d29,d7[0]
+	vmlal.u32	q9,d29,d7[1]
+	vst1.64	{q10},[r7,:128]!
+	vmlal.u32	q11,d28,d0[0]
+	vld1.64	{q10},[r6,:128]
+	vmlal.u32	q12,d28,d0[1]
+	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+5]
+	vmlal.u32	q13,d28,d1[0]
+	it	ne
+	addne	r6,r6,#16	@ don't advance in last iteration
+	vmlal.u32	q6,d28,d1[1]
+	vmlal.u32	q7,d28,d2[0]
+	vmlal.u32	q8,d28,d2[1]
+	vmlal.u32	q9,d28,d3[0]
+	vmlal.u32	q10,d28,d3[1]
+	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+6]
+	vmlal.u32	q11,d29,d4[0]
+	vmlal.u32	q12,d29,d4[1]
+	vmlal.u32	q13,d29,d5[0]
+	vmlal.u32	q6,d29,d5[1]
+	vmlal.u32	q7,d29,d6[0]
+	vmlal.u32	q8,d29,d6[1]
+	vmlal.u32	q9,d29,d7[0]
+	vmlal.u32	q10,d29,d7[1]
+	vst1.64	{q11},[r7,:128]!
+	vmlal.u32	q12,d28,d0[0]
+	vld1.64	{q11},[r6,:128]
+	vmlal.u32	q13,d28,d0[1]
+	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+6]
+	vmlal.u32	q6,d28,d1[0]
+	it	ne
+	addne	r6,r6,#16	@ don't advance in last iteration
+	vmlal.u32	q7,d28,d1[1]
+	vmlal.u32	q8,d28,d2[0]
+	vmlal.u32	q9,d28,d2[1]
+	vmlal.u32	q10,d28,d3[0]
+	vmlal.u32	q11,d28,d3[1]
+	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+7]
+	vmlal.u32	q12,d29,d4[0]
+	vmlal.u32	q13,d29,d4[1]
+	vmlal.u32	q6,d29,d5[0]
+	vmlal.u32	q7,d29,d5[1]
+	vmlal.u32	q8,d29,d6[0]
+	vmlal.u32	q9,d29,d6[1]
+	vmlal.u32	q10,d29,d7[0]
+	vmlal.u32	q11,d29,d7[1]
+	vst1.64	{q12},[r7,:128]!
+	vmlal.u32	q13,d28,d0[0]
+	vld1.64	{q12},[r6,:128]
+	vmlal.u32	q6,d28,d0[1]
+	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+7]
+	vmlal.u32	q7,d28,d1[0]
+	it	ne
+	addne	r6,r6,#16	@ don't advance in last iteration
+	vmlal.u32	q8,d28,d1[1]
+	vmlal.u32	q9,d28,d2[0]
+	vmlal.u32	q10,d28,d2[1]
+	vmlal.u32	q11,d28,d3[0]
+	vmlal.u32	q12,d28,d3[1]
+	it	eq
+	subeq	r1,r1,r5,lsl#2	@ rewind
+	vmlal.u32	q13,d29,d4[0]
+	vld1.32	{d28},[sp,:64]		@ pull smashed b[8*i+0]
+	vmlal.u32	q6,d29,d4[1]
+	vld1.32	{d0,d1,d2,d3},[r1]!
+	vmlal.u32	q7,d29,d5[0]
+	add	r10,sp,#8		@ rewind
+	vmlal.u32	q8,d29,d5[1]
+	vmlal.u32	q9,d29,d6[0]
+	vmlal.u32	q10,d29,d6[1]
+	vmlal.u32	q11,d29,d7[0]
+	vst1.64	{q13},[r7,:128]!
+	vmlal.u32	q12,d29,d7[1]
+
+	bne	LNEON_8n_inner
+	add	r6,sp,#128
+	vst1.64	{q6,q7},[r7,:256]!
+	veor	q2,q2,q2		@ d4-d5
+	vst1.64	{q8,q9},[r7,:256]!
+	veor	q3,q3,q3		@ d6-d7
+	vst1.64	{q10,q11},[r7,:256]!
+	vst1.64	{q12},[r7,:128]
+
+	subs	r9,r9,#8
+	vld1.64	{q6,q7},[r6,:256]!
+	vld1.64	{q8,q9},[r6,:256]!
+	vld1.64	{q10,q11},[r6,:256]!
+	vld1.64	{q12,q13},[r6,:256]!
+
+	itt	ne
+	subne	r3,r3,r5,lsl#2	@ rewind
+	bne	LNEON_8n_outer
+
+	add	r7,sp,#128
+	vst1.64	{q2,q3}, [sp,:256]!	@ start wiping stack frame
+	vshr.u64	d10,d12,#16
+	vst1.64	{q2,q3},[sp,:256]!
+	vadd.u64	d13,d13,d10
+	vst1.64	{q2,q3}, [sp,:256]!
+	vshr.u64	d10,d13,#16
+	vst1.64	{q2,q3}, [sp,:256]!
+	vzip.16	d12,d13
+
+	mov	r8,r5
+	b	LNEON_tail_entry
+
+.align	4
+LNEON_tail:
+	vadd.u64	d12,d12,d10
+	vshr.u64	d10,d12,#16
+	vld1.64	{q8,q9}, [r6, :256]!
+	vadd.u64	d13,d13,d10
+	vld1.64	{q10,q11}, [r6, :256]!
+	vshr.u64	d10,d13,#16
+	vld1.64	{q12,q13}, [r6, :256]!
+	vzip.16	d12,d13
+
+LNEON_tail_entry:
+	vadd.u64	d14,d14,d10
+	vst1.32	{d12[0]}, [r7, :32]!
+	vshr.u64	d10,d14,#16
+	vadd.u64	d15,d15,d10
+	vshr.u64	d10,d15,#16
+	vzip.16	d14,d15
+	vadd.u64	d16,d16,d10
+	vst1.32	{d14[0]}, [r7, :32]!
+	vshr.u64	d10,d16,#16
+	vadd.u64	d17,d17,d10
+	vshr.u64	d10,d17,#16
+	vzip.16	d16,d17
+	vadd.u64	d18,d18,d10
+	vst1.32	{d16[0]}, [r7, :32]!
+	vshr.u64	d10,d18,#16
+	vadd.u64	d19,d19,d10
+	vshr.u64	d10,d19,#16
+	vzip.16	d18,d19
+	vadd.u64	d20,d20,d10
+	vst1.32	{d18[0]}, [r7, :32]!
+	vshr.u64	d10,d20,#16
+	vadd.u64	d21,d21,d10
+	vshr.u64	d10,d21,#16
+	vzip.16	d20,d21
+	vadd.u64	d22,d22,d10
+	vst1.32	{d20[0]}, [r7, :32]!
+	vshr.u64	d10,d22,#16
+	vadd.u64	d23,d23,d10
+	vshr.u64	d10,d23,#16
+	vzip.16	d22,d23
+	vadd.u64	d24,d24,d10
+	vst1.32	{d22[0]}, [r7, :32]!
+	vshr.u64	d10,d24,#16
+	vadd.u64	d25,d25,d10
+	vshr.u64	d10,d25,#16
+	vzip.16	d24,d25
+	vadd.u64	d26,d26,d10
+	vst1.32	{d24[0]}, [r7, :32]!
+	vshr.u64	d10,d26,#16
+	vadd.u64	d27,d27,d10
+	vshr.u64	d10,d27,#16
+	vzip.16	d26,d27
+	vld1.64	{q6,q7}, [r6, :256]!
+	subs	r8,r8,#8
+	vst1.32	{d26[0]},   [r7, :32]!
+	bne	LNEON_tail
+
+	vst1.32	{d10[0]}, [r7, :32]		@ top-most bit
+	sub	r3,r3,r5,lsl#2			@ rewind r3
+	subs	r1,sp,#0				@ clear carry flag
+	add	r2,sp,r5,lsl#2
+
+LNEON_sub:
+	ldmia	r1!, {r4,r5,r6,r7}
+	ldmia	r3!, {r8,r9,r10,r11}
+	sbcs	r8, r4,r8
+	sbcs	r9, r5,r9
+	sbcs	r10,r6,r10
+	sbcs	r11,r7,r11
+	teq	r1,r2				@ preserves carry
+	stmia	r0!, {r8,r9,r10,r11}
+	bne	LNEON_sub
+
+	ldr	r10, [r1]				@ load top-most bit
+	mov	r11,sp
+	veor	q0,q0,q0
+	sub	r11,r2,r11				@ this is num*4
+	veor	q1,q1,q1
+	mov	r1,sp
+	sub	r0,r0,r11				@ rewind r0
+	mov	r3,r2				@ second 3/4th of frame
+	sbcs	r10,r10,#0				@ result is carry flag
+
+LNEON_copy_n_zap:
+	ldmia	r1!, {r4,r5,r6,r7}
+	ldmia	r0,  {r8,r9,r10,r11}
+	it	cc
+	movcc	r8, r4
+	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
+	itt	cc
+	movcc	r9, r5
+	movcc	r10,r6
+	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
+	it	cc
+	movcc	r11,r7
+	ldmia	r1, {r4,r5,r6,r7}
+	stmia	r0!, {r8,r9,r10,r11}
+	sub	r1,r1,#16
+	ldmia	r0, {r8,r9,r10,r11}
+	it	cc
+	movcc	r8, r4
+	vst1.64	{q0,q1}, [r1,:256]!			@ wipe
+	itt	cc
+	movcc	r9, r5
+	movcc	r10,r6
+	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
+	it	cc
+	movcc	r11,r7
+	teq	r1,r2				@ preserves carry
+	stmia	r0!, {r8,r9,r10,r11}
+	bne	LNEON_copy_n_zap
+
+	mov	sp,ip
+	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
+	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
+	bx	lr						@ bx lr
+
+#endif
+.byte	77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	2
+#if __ARM_MAX_ARCH__>=7
+.comm	_OPENSSL_armcap_P,4
+.non_lazy_symbol_pointer
+OPENSSL_armcap_P:
+.indirect_symbol	_OPENSSL_armcap_P
+.long	0
+.private_extern	_OPENSSL_armcap_P
+#endif
+#endif  // !OPENSSL_NO_ASM
+#endif  // defined(__arm__) && defined(__APPLE__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/armv4-mont.linux.arm.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/armv4-mont.linux.arm.S
@ -0,0 +1,984 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__arm__) && defined(__linux__)
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(__arm__)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+#include <CBigNumBoringSSL_arm_arch.h>
+
+@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
+@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions.
+.arch	armv7-a
+
+.text
+#if defined(__thumb2__)
+.syntax	unified
+.thumb
+#else
+.code	32
+#endif
+
+#if __ARM_MAX_ARCH__>=7
+.align	5
+.LOPENSSL_armcap:
+.word	OPENSSL_armcap_P-.Lbn_mul_mont
+#endif
+
+.globl	bn_mul_mont
+.hidden	bn_mul_mont
+.type	bn_mul_mont,%function
+
+.align	5
+bn_mul_mont:
+.Lbn_mul_mont:
+	ldr	ip,[sp,#4]		@ load num
+	stmdb	sp!,{r0,r2}		@ sp points at argument block
+#if __ARM_MAX_ARCH__>=7
+	tst	ip,#7
+	bne	.Lialu
+	adr	r0,.Lbn_mul_mont
+	ldr	r2,.LOPENSSL_armcap
+	ldr	r0,[r0,r2]
+#ifdef	__APPLE__
+	ldr	r0,[r0]
+#endif
+	tst	r0,#ARMV7_NEON		@ NEON available?
+	ldmia	sp, {r0,r2}
+	beq	.Lialu
+	add	sp,sp,#8
+	b	bn_mul8x_mont_neon
+.align	4
+.Lialu:
+#endif
+	cmp	ip,#2
+	mov	r0,ip			@ load num
+#ifdef	__thumb2__
+	ittt	lt
+#endif
+	movlt	r0,#0
+	addlt	sp,sp,#2*4
+	blt	.Labrt
+
+	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}		@ save 10 registers
+
+	mov	r0,r0,lsl#2		@ rescale r0 for byte count
+	sub	sp,sp,r0		@ alloca(4*num)
+	sub	sp,sp,#4		@ +extra dword
+	sub	r0,r0,#4		@ "num=num-1"
+	add	r4,r2,r0		@ &bp[num-1]
+
+	add	r0,sp,r0		@ r0 to point at &tp[num-1]
+	ldr	r8,[r0,#14*4]		@ &n0
+	ldr	r2,[r2]		@ bp[0]
+	ldr	r5,[r1],#4		@ ap[0],ap++
+	ldr	r6,[r3],#4		@ np[0],np++
+	ldr	r8,[r8]		@ *n0
+	str	r4,[r0,#15*4]		@ save &bp[num]
+
+	umull	r10,r11,r5,r2	@ ap[0]*bp[0]
+	str	r8,[r0,#14*4]		@ save n0 value
+	mul	r8,r10,r8		@ "tp[0]"*n0
+	mov	r12,#0
+	umlal	r10,r12,r6,r8	@ np[0]*n0+"t[0]"
+	mov	r4,sp
+
+.L1st:
+	ldr	r5,[r1],#4		@ ap[j],ap++
+	mov	r10,r11
+	ldr	r6,[r3],#4		@ np[j],np++
+	mov	r11,#0
+	umlal	r10,r11,r5,r2	@ ap[j]*bp[0]
+	mov	r14,#0
+	umlal	r12,r14,r6,r8	@ np[j]*n0
+	adds	r12,r12,r10
+	str	r12,[r4],#4		@ tp[j-1]=,tp++
+	adc	r12,r14,#0
+	cmp	r4,r0
+	bne	.L1st
+
+	adds	r12,r12,r11
+	ldr	r4,[r0,#13*4]		@ restore bp
+	mov	r14,#0
+	ldr	r8,[r0,#14*4]		@ restore n0
+	adc	r14,r14,#0
+	str	r12,[r0]		@ tp[num-1]=
+	mov	r7,sp
+	str	r14,[r0,#4]		@ tp[num]=
+
+.Louter:
+	sub	r7,r0,r7		@ "original" r0-1 value
+	sub	r1,r1,r7		@ "rewind" ap to &ap[1]
+	ldr	r2,[r4,#4]!		@ *(++bp)
+	sub	r3,r3,r7		@ "rewind" np to &np[1]
+	ldr	r5,[r1,#-4]		@ ap[0]
+	ldr	r10,[sp]		@ tp[0]
+	ldr	r6,[r3,#-4]		@ np[0]
+	ldr	r7,[sp,#4]		@ tp[1]
+
+	mov	r11,#0
+	umlal	r10,r11,r5,r2	@ ap[0]*bp[i]+tp[0]
+	str	r4,[r0,#13*4]		@ save bp
+	mul	r8,r10,r8
+	mov	r12,#0
+	umlal	r10,r12,r6,r8	@ np[0]*n0+"tp[0]"
+	mov	r4,sp
+
+.Linner:
+	ldr	r5,[r1],#4		@ ap[j],ap++
+	adds	r10,r11,r7		@ +=tp[j]
+	ldr	r6,[r3],#4		@ np[j],np++
+	mov	r11,#0
+	umlal	r10,r11,r5,r2	@ ap[j]*bp[i]
+	mov	r14,#0
+	umlal	r12,r14,r6,r8	@ np[j]*n0
+	adc	r11,r11,#0
+	ldr	r7,[r4,#8]		@ tp[j+1]
+	adds	r12,r12,r10
+	str	r12,[r4],#4		@ tp[j-1]=,tp++
+	adc	r12,r14,#0
+	cmp	r4,r0
+	bne	.Linner
+
+	adds	r12,r12,r11
+	mov	r14,#0
+	ldr	r4,[r0,#13*4]		@ restore bp
+	adc	r14,r14,#0
+	ldr	r8,[r0,#14*4]		@ restore n0
+	adds	r12,r12,r7
+	ldr	r7,[r0,#15*4]		@ restore &bp[num]
+	adc	r14,r14,#0
+	str	r12,[r0]		@ tp[num-1]=
+	str	r14,[r0,#4]		@ tp[num]=
+
+	cmp	r4,r7
+#ifdef	__thumb2__
+	itt	ne
+#endif
+	movne	r7,sp
+	bne	.Louter
+
+	ldr	r2,[r0,#12*4]		@ pull rp
+	mov	r5,sp
+	add	r0,r0,#4		@ r0 to point at &tp[num]
+	sub	r5,r0,r5		@ "original" num value
+	mov	r4,sp			@ "rewind" r4
+	mov	r1,r4			@ "borrow" r1
+	sub	r3,r3,r5		@ "rewind" r3 to &np[0]
+
+	subs	r7,r7,r7		@ "clear" carry flag
+.Lsub:	ldr	r7,[r4],#4
+	ldr	r6,[r3],#4
+	sbcs	r7,r7,r6		@ tp[j]-np[j]
+	str	r7,[r2],#4		@ rp[j]=
+	teq	r4,r0		@ preserve carry
+	bne	.Lsub
+	sbcs	r14,r14,#0		@ upmost carry
+	mov	r4,sp			@ "rewind" r4
+	sub	r2,r2,r5		@ "rewind" r2
+
+.Lcopy:	ldr	r7,[r4]		@ conditional copy
+	ldr	r5,[r2]
+	str	sp,[r4],#4		@ zap tp
+#ifdef	__thumb2__
+	it	cc
+#endif
+	movcc	r5,r7
+	str	r5,[r2],#4
+	teq	r4,r0		@ preserve carry
+	bne	.Lcopy
+
+	mov	sp,r0
+	add	sp,sp,#4		@ skip over tp[num+1]
+	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}		@ restore registers
+	add	sp,sp,#2*4		@ skip over {r0,r2}
+	mov	r0,#1
+.Labrt:
+#if __ARM_ARCH__>=5
+	bx	lr				@ bx lr
+#else
+	tst	lr,#1
+	moveq	pc,lr			@ be binary compatible with V4, yet
+.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
+#endif
+.size	bn_mul_mont,.-bn_mul_mont
+#if __ARM_MAX_ARCH__>=7
+.arch	armv7-a
+.fpu	neon
+
+.type	bn_mul8x_mont_neon,%function
+.align	5
+bn_mul8x_mont_neon:
+	mov	ip,sp
+	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
+	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ ABI specification says so
+	ldmia	ip,{r4,r5}		@ load rest of parameter block
+	mov	ip,sp
+
+	cmp	r5,#8
+	bhi	.LNEON_8n
+
+	@ special case for r5==8, everything is in register bank...
+
+	vld1.32	{d28[0]}, [r2,:32]!
+	veor	d8,d8,d8
+	sub	r7,sp,r5,lsl#4
+	vld1.32	{d0,d1,d2,d3},  [r1]!		@ can't specify :32 :-(
+	and	r7,r7,#-64
+	vld1.32	{d30[0]}, [r4,:32]
+	mov	sp,r7			@ alloca
+	vzip.16	d28,d8
+
+	vmull.u32	q6,d28,d0[0]
+	vmull.u32	q7,d28,d0[1]
+	vmull.u32	q8,d28,d1[0]
+	vshl.i64	d29,d13,#16
+	vmull.u32	q9,d28,d1[1]
+
+	vadd.u64	d29,d29,d12
+	veor	d8,d8,d8
+	vmul.u32	d29,d29,d30
+
+	vmull.u32	q10,d28,d2[0]
+	vld1.32	{d4,d5,d6,d7}, [r3]!
+	vmull.u32	q11,d28,d2[1]
+	vmull.u32	q12,d28,d3[0]
+	vzip.16	d29,d8
+	vmull.u32	q13,d28,d3[1]
+
+	vmlal.u32	q6,d29,d4[0]
+	sub	r9,r5,#1
+	vmlal.u32	q7,d29,d4[1]
+	vmlal.u32	q8,d29,d5[0]
+	vmlal.u32	q9,d29,d5[1]
+
+	vmlal.u32	q10,d29,d6[0]
+	vmov	q5,q6
+	vmlal.u32	q11,d29,d6[1]
+	vmov	q6,q7
+	vmlal.u32	q12,d29,d7[0]
+	vmov	q7,q8
+	vmlal.u32	q13,d29,d7[1]
+	vmov	q8,q9
+	vmov	q9,q10
+	vshr.u64	d10,d10,#16
+	vmov	q10,q11
+	vmov	q11,q12
+	vadd.u64	d10,d10,d11
+	vmov	q12,q13
+	veor	q13,q13
+	vshr.u64	d10,d10,#16
+
+	b	.LNEON_outer8
+
+.align	4
+.LNEON_outer8:
+	vld1.32	{d28[0]}, [r2,:32]!
+	veor	d8,d8,d8
+	vzip.16	d28,d8
+	vadd.u64	d12,d12,d10
+
+	vmlal.u32	q6,d28,d0[0]
+	vmlal.u32	q7,d28,d0[1]
+	vmlal.u32	q8,d28,d1[0]
+	vshl.i64	d29,d13,#16
+	vmlal.u32	q9,d28,d1[1]
+
+	vadd.u64	d29,d29,d12
+	veor	d8,d8,d8
+	subs	r9,r9,#1
+	vmul.u32	d29,d29,d30
+
+	vmlal.u32	q10,d28,d2[0]
+	vmlal.u32	q11,d28,d2[1]
+	vmlal.u32	q12,d28,d3[0]
+	vzip.16	d29,d8
+	vmlal.u32	q13,d28,d3[1]
+
+	vmlal.u32	q6,d29,d4[0]
+	vmlal.u32	q7,d29,d4[1]
+	vmlal.u32	q8,d29,d5[0]
+	vmlal.u32	q9,d29,d5[1]
+
+	vmlal.u32	q10,d29,d6[0]
+	vmov	q5,q6
+	vmlal.u32	q11,d29,d6[1]
+	vmov	q6,q7
+	vmlal.u32	q12,d29,d7[0]
+	vmov	q7,q8
+	vmlal.u32	q13,d29,d7[1]
+	vmov	q8,q9
+	vmov	q9,q10
+	vshr.u64	d10,d10,#16
+	vmov	q10,q11
+	vmov	q11,q12
+	vadd.u64	d10,d10,d11
+	vmov	q12,q13
+	veor	q13,q13
+	vshr.u64	d10,d10,#16
+
+	bne	.LNEON_outer8
+
+	vadd.u64	d12,d12,d10
+	mov	r7,sp
+	vshr.u64	d10,d12,#16
+	mov	r8,r5
+	vadd.u64	d13,d13,d10
+	add	r6,sp,#96
+	vshr.u64	d10,d13,#16
+	vzip.16	d12,d13
+
+	b	.LNEON_tail_entry
+
+.align	4
+.LNEON_8n:
+	veor	q6,q6,q6
+	sub	r7,sp,#128
+	veor	q7,q7,q7
+	sub	r7,r7,r5,lsl#4
+	veor	q8,q8,q8
+	and	r7,r7,#-64
+	veor	q9,q9,q9
+	mov	sp,r7			@ alloca
+	veor	q10,q10,q10
+	add	r7,r7,#256
+	veor	q11,q11,q11
+	sub	r8,r5,#8
+	veor	q12,q12,q12
+	veor	q13,q13,q13
+
+.LNEON_8n_init:
+	vst1.64	{q6,q7},[r7,:256]!
+	subs	r8,r8,#8
+	vst1.64	{q8,q9},[r7,:256]!
+	vst1.64	{q10,q11},[r7,:256]!
+	vst1.64	{q12,q13},[r7,:256]!
+	bne	.LNEON_8n_init
+
+	add	r6,sp,#256
+	vld1.32	{d0,d1,d2,d3},[r1]!
+	add	r10,sp,#8
+	vld1.32	{d30[0]},[r4,:32]
+	mov	r9,r5
+	b	.LNEON_8n_outer
+
+.align	4
+.LNEON_8n_outer:
+	vld1.32	{d28[0]},[r2,:32]!	@ *b++
+	veor	d8,d8,d8
+	vzip.16	d28,d8
+	add	r7,sp,#128
+	vld1.32	{d4,d5,d6,d7},[r3]!
+
+	vmlal.u32	q6,d28,d0[0]
+	vmlal.u32	q7,d28,d0[1]
+	veor	d8,d8,d8
+	vmlal.u32	q8,d28,d1[0]
+	vshl.i64	d29,d13,#16
+	vmlal.u32	q9,d28,d1[1]
+	vadd.u64	d29,d29,d12
+	vmlal.u32	q10,d28,d2[0]
+	vmul.u32	d29,d29,d30
+	vmlal.u32	q11,d28,d2[1]
+	vst1.32	{d28},[sp,:64]		@ put aside smashed b[8*i+0]
+	vmlal.u32	q12,d28,d3[0]
+	vzip.16	d29,d8
+	vmlal.u32	q13,d28,d3[1]
+	vld1.32	{d28[0]},[r2,:32]!	@ *b++
+	vmlal.u32	q6,d29,d4[0]
+	veor	d10,d10,d10
+	vmlal.u32	q7,d29,d4[1]
+	vzip.16	d28,d10
+	vmlal.u32	q8,d29,d5[0]
+	vshr.u64	d12,d12,#16
+	vmlal.u32	q9,d29,d5[1]
+	vmlal.u32	q10,d29,d6[0]
+	vadd.u64	d12,d12,d13
+	vmlal.u32	q11,d29,d6[1]
+	vshr.u64	d12,d12,#16
+	vmlal.u32	q12,d29,d7[0]
+	vmlal.u32	q13,d29,d7[1]
+	vadd.u64	d14,d14,d12
+	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+0]
+	vmlal.u32	q7,d28,d0[0]
+	vld1.64	{q6},[r6,:128]!
+	vmlal.u32	q8,d28,d0[1]
+	veor	d8,d8,d8
+	vmlal.u32	q9,d28,d1[0]
+	vshl.i64	d29,d15,#16
+	vmlal.u32	q10,d28,d1[1]
+	vadd.u64	d29,d29,d14
+	vmlal.u32	q11,d28,d2[0]
+	vmul.u32	d29,d29,d30
+	vmlal.u32	q12,d28,d2[1]
+	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+1]
+	vmlal.u32	q13,d28,d3[0]
+	vzip.16	d29,d8
+	vmlal.u32	q6,d28,d3[1]
+	vld1.32	{d28[0]},[r2,:32]!	@ *b++
+	vmlal.u32	q7,d29,d4[0]
+	veor	d10,d10,d10
+	vmlal.u32	q8,d29,d4[1]
+	vzip.16	d28,d10
+	vmlal.u32	q9,d29,d5[0]
+	vshr.u64	d14,d14,#16
+	vmlal.u32	q10,d29,d5[1]
+	vmlal.u32	q11,d29,d6[0]
+	vadd.u64	d14,d14,d15
+	vmlal.u32	q12,d29,d6[1]
+	vshr.u64	d14,d14,#16
+	vmlal.u32	q13,d29,d7[0]
+	vmlal.u32	q6,d29,d7[1]
+	vadd.u64	d16,d16,d14
+	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+1]
+	vmlal.u32	q8,d28,d0[0]
+	vld1.64	{q7},[r6,:128]!
+	vmlal.u32	q9,d28,d0[1]
+	veor	d8,d8,d8
+	vmlal.u32	q10,d28,d1[0]
+	vshl.i64	d29,d17,#16
+	vmlal.u32	q11,d28,d1[1]
+	vadd.u64	d29,d29,d16
+	vmlal.u32	q12,d28,d2[0]
+	vmul.u32	d29,d29,d30
+	vmlal.u32	q13,d28,d2[1]
+	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+2]
+	vmlal.u32	q6,d28,d3[0]
+	vzip.16	d29,d8
+	vmlal.u32	q7,d28,d3[1]
+	vld1.32	{d28[0]},[r2,:32]!	@ *b++
+	vmlal.u32	q8,d29,d4[0]
+	veor	d10,d10,d10
+	vmlal.u32	q9,d29,d4[1]
+	vzip.16	d28,d10
+	vmlal.u32	q10,d29,d5[0]
+	vshr.u64	d16,d16,#16
+	vmlal.u32	q11,d29,d5[1]
+	vmlal.u32	q12,d29,d6[0]
+	vadd.u64	d16,d16,d17
+	vmlal.u32	q13,d29,d6[1]
+	vshr.u64	d16,d16,#16
+	vmlal.u32	q6,d29,d7[0]
+	vmlal.u32	q7,d29,d7[1]
+	vadd.u64	d18,d18,d16
+	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+2]
+	vmlal.u32	q9,d28,d0[0]
+	vld1.64	{q8},[r6,:128]!
+	vmlal.u32	q10,d28,d0[1]
+	veor	d8,d8,d8
+	vmlal.u32	q11,d28,d1[0]
+	vshl.i64	d29,d19,#16
+	vmlal.u32	q12,d28,d1[1]
+	vadd.u64	d29,d29,d18
+	vmlal.u32	q13,d28,d2[0]
+	vmul.u32	d29,d29,d30
+	vmlal.u32	q6,d28,d2[1]
+	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+3]
+	vmlal.u32	q7,d28,d3[0]
+	vzip.16	d29,d8
+	vmlal.u32	q8,d28,d3[1]
+	vld1.32	{d28[0]},[r2,:32]!	@ *b++
+	vmlal.u32	q9,d29,d4[0]
+	veor	d10,d10,d10
+	vmlal.u32	q10,d29,d4[1]
+	vzip.16	d28,d10
+	vmlal.u32	q11,d29,d5[0]
+	vshr.u64	d18,d18,#16
+	vmlal.u32	q12,d29,d5[1]
+	vmlal.u32	q13,d29,d6[0]
+	vadd.u64	d18,d18,d19
+	vmlal.u32	q6,d29,d6[1]
+	vshr.u64	d18,d18,#16
+	vmlal.u32	q7,d29,d7[0]
+	vmlal.u32	q8,d29,d7[1]
+	vadd.u64	d20,d20,d18
+	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+3]
+	vmlal.u32	q10,d28,d0[0]
+	vld1.64	{q9},[r6,:128]!
+	vmlal.u32	q11,d28,d0[1]
+	veor	d8,d8,d8
+	vmlal.u32	q12,d28,d1[0]
+	vshl.i64	d29,d21,#16
+	vmlal.u32	q13,d28,d1[1]
+	vadd.u64	d29,d29,d20
+	vmlal.u32	q6,d28,d2[0]
+	vmul.u32	d29,d29,d30
+	vmlal.u32	q7,d28,d2[1]
+	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+4]
+	vmlal.u32	q8,d28,d3[0]
+	vzip.16	d29,d8
+	vmlal.u32	q9,d28,d3[1]
+	vld1.32	{d28[0]},[r2,:32]!	@ *b++
+	vmlal.u32	q10,d29,d4[0]
+	veor	d10,d10,d10
+	vmlal.u32	q11,d29,d4[1]
+	vzip.16	d28,d10
+	vmlal.u32	q12,d29,d5[0]
+	vshr.u64	d20,d20,#16
+	vmlal.u32	q13,d29,d5[1]
+	vmlal.u32	q6,d29,d6[0]
+	vadd.u64	d20,d20,d21
+	vmlal.u32	q7,d29,d6[1]
+	vshr.u64	d20,d20,#16
+	vmlal.u32	q8,d29,d7[0]
+	vmlal.u32	q9,d29,d7[1]
+	vadd.u64	d22,d22,d20
+	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+4]
+	vmlal.u32	q11,d28,d0[0]
+	vld1.64	{q10},[r6,:128]!
+	vmlal.u32	q12,d28,d0[1]
+	veor	d8,d8,d8
+	vmlal.u32	q13,d28,d1[0]
+	vshl.i64	d29,d23,#16
+	vmlal.u32	q6,d28,d1[1]
+	vadd.u64	d29,d29,d22
+	vmlal.u32	q7,d28,d2[0]
+	vmul.u32	d29,d29,d30
+	vmlal.u32	q8,d28,d2[1]
+	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+5]
+	vmlal.u32	q9,d28,d3[0]
+	vzip.16	d29,d8
+	vmlal.u32	q10,d28,d3[1]
+	vld1.32	{d28[0]},[r2,:32]!	@ *b++
+	vmlal.u32	q11,d29,d4[0]
+	veor	d10,d10,d10
+	vmlal.u32	q12,d29,d4[1]
+	vzip.16	d28,d10
+	vmlal.u32	q13,d29,d5[0]
+	vshr.u64	d22,d22,#16
+	vmlal.u32	q6,d29,d5[1]
+	vmlal.u32	q7,d29,d6[0]
+	vadd.u64	d22,d22,d23
+	vmlal.u32	q8,d29,d6[1]
+	vshr.u64	d22,d22,#16
+	vmlal.u32	q9,d29,d7[0]
+	vmlal.u32	q10,d29,d7[1]
+	vadd.u64	d24,d24,d22
+	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+5]
+	vmlal.u32	q12,d28,d0[0]
+	vld1.64	{q11},[r6,:128]!
+	vmlal.u32	q13,d28,d0[1]
+	veor	d8,d8,d8
+	vmlal.u32	q6,d28,d1[0]
+	vshl.i64	d29,d25,#16
+	vmlal.u32	q7,d28,d1[1]
+	vadd.u64	d29,d29,d24
+	vmlal.u32	q8,d28,d2[0]
+	vmul.u32	d29,d29,d30
+	vmlal.u32	q9,d28,d2[1]
+	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+6]
+	vmlal.u32	q10,d28,d3[0]
+	vzip.16	d29,d8
+	vmlal.u32	q11,d28,d3[1]
+	vld1.32	{d28[0]},[r2,:32]!	@ *b++
+	vmlal.u32	q12,d29,d4[0]
+	veor	d10,d10,d10
+	vmlal.u32	q13,d29,d4[1]
+	vzip.16	d28,d10
+	vmlal.u32	q6,d29,d5[0]
+	vshr.u64	d24,d24,#16
+	vmlal.u32	q7,d29,d5[1]
+	vmlal.u32	q8,d29,d6[0]
+	vadd.u64	d24,d24,d25
+	vmlal.u32	q9,d29,d6[1]
+	vshr.u64	d24,d24,#16
+	vmlal.u32	q10,d29,d7[0]
+	vmlal.u32	q11,d29,d7[1]
+	vadd.u64	d26,d26,d24
+	vst1.32	{d29},[r10,:64]!	@ put aside smashed m[8*i+6]
+	vmlal.u32	q13,d28,d0[0]
+	vld1.64	{q12},[r6,:128]!
+	vmlal.u32	q6,d28,d0[1]
+	veor	d8,d8,d8
+	vmlal.u32	q7,d28,d1[0]
+	vshl.i64	d29,d27,#16
+	vmlal.u32	q8,d28,d1[1]
+	vadd.u64	d29,d29,d26
+	vmlal.u32	q9,d28,d2[0]
+	vmul.u32	d29,d29,d30
+	vmlal.u32	q10,d28,d2[1]
+	vst1.32	{d28},[r10,:64]!	@ put aside smashed b[8*i+7]
+	vmlal.u32	q11,d28,d3[0]
+	vzip.16	d29,d8
+	vmlal.u32	q12,d28,d3[1]
+	vld1.32	{d28},[sp,:64]		@ pull smashed b[8*i+0]
+	vmlal.u32	q13,d29,d4[0]
+	vld1.32	{d0,d1,d2,d3},[r1]!
+	vmlal.u32	q6,d29,d4[1]
+	vmlal.u32	q7,d29,d5[0]
+	vshr.u64	d26,d26,#16
+	vmlal.u32	q8,d29,d5[1]
+	vmlal.u32	q9,d29,d6[0]
+	vadd.u64	d26,d26,d27
+	vmlal.u32	q10,d29,d6[1]
+	vshr.u64	d26,d26,#16
+	vmlal.u32	q11,d29,d7[0]
+	vmlal.u32	q12,d29,d7[1]
+	vadd.u64	d12,d12,d26
+	vst1.32	{d29},[r10,:64]	@ put aside smashed m[8*i+7]
+	add	r10,sp,#8		@ rewind
+	sub	r8,r5,#8
+	b	.LNEON_8n_inner
+
+.align	4
+.LNEON_8n_inner:
+	subs	r8,r8,#8
+	vmlal.u32	q6,d28,d0[0]
+	vld1.64	{q13},[r6,:128]
+	vmlal.u32	q7,d28,d0[1]
+	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+0]
+	vmlal.u32	q8,d28,d1[0]
+	vld1.32	{d4,d5,d6,d7},[r3]!
+	vmlal.u32	q9,d28,d1[1]
+	it	ne
+	addne	r6,r6,#16	@ don't advance in last iteration
+	vmlal.u32	q10,d28,d2[0]
+	vmlal.u32	q11,d28,d2[1]
+	vmlal.u32	q12,d28,d3[0]
+	vmlal.u32	q13,d28,d3[1]
+	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+1]
+	vmlal.u32	q6,d29,d4[0]
+	vmlal.u32	q7,d29,d4[1]
+	vmlal.u32	q8,d29,d5[0]
+	vmlal.u32	q9,d29,d5[1]
+	vmlal.u32	q10,d29,d6[0]
+	vmlal.u32	q11,d29,d6[1]
+	vmlal.u32	q12,d29,d7[0]
+	vmlal.u32	q13,d29,d7[1]
+	vst1.64	{q6},[r7,:128]!
+	vmlal.u32	q7,d28,d0[0]
+	vld1.64	{q6},[r6,:128]
+	vmlal.u32	q8,d28,d0[1]
+	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+1]
+	vmlal.u32	q9,d28,d1[0]
+	it	ne
+	addne	r6,r6,#16	@ don't advance in last iteration
+	vmlal.u32	q10,d28,d1[1]
+	vmlal.u32	q11,d28,d2[0]
+	vmlal.u32	q12,d28,d2[1]
+	vmlal.u32	q13,d28,d3[0]
+	vmlal.u32	q6,d28,d3[1]
+	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+2]
+	vmlal.u32	q7,d29,d4[0]
+	vmlal.u32	q8,d29,d4[1]
+	vmlal.u32	q9,d29,d5[0]
+	vmlal.u32	q10,d29,d5[1]
+	vmlal.u32	q11,d29,d6[0]
+	vmlal.u32	q12,d29,d6[1]
+	vmlal.u32	q13,d29,d7[0]
+	vmlal.u32	q6,d29,d7[1]
+	vst1.64	{q7},[r7,:128]!
+	vmlal.u32	q8,d28,d0[0]
+	vld1.64	{q7},[r6,:128]
+	vmlal.u32	q9,d28,d0[1]
+	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+2]
+	vmlal.u32	q10,d28,d1[0]
+	it	ne
+	addne	r6,r6,#16	@ don't advance in last iteration
+	vmlal.u32	q11,d28,d1[1]
+	vmlal.u32	q12,d28,d2[0]
+	vmlal.u32	q13,d28,d2[1]
+	vmlal.u32	q6,d28,d3[0]
+	vmlal.u32	q7,d28,d3[1]
+	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+3]
+	vmlal.u32	q8,d29,d4[0]
+	vmlal.u32	q9,d29,d4[1]
+	vmlal.u32	q10,d29,d5[0]
+	vmlal.u32	q11,d29,d5[1]
+	vmlal.u32	q12,d29,d6[0]
+	vmlal.u32	q13,d29,d6[1]
+	vmlal.u32	q6,d29,d7[0]
+	vmlal.u32	q7,d29,d7[1]
+	vst1.64	{q8},[r7,:128]!
+	vmlal.u32	q9,d28,d0[0]
+	vld1.64	{q8},[r6,:128]
+	vmlal.u32	q10,d28,d0[1]
+	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+3]
+	vmlal.u32	q11,d28,d1[0]
+	it	ne
+	addne	r6,r6,#16	@ don't advance in last iteration
+	vmlal.u32	q12,d28,d1[1]
+	vmlal.u32	q13,d28,d2[0]
+	vmlal.u32	q6,d28,d2[1]
+	vmlal.u32	q7,d28,d3[0]
+	vmlal.u32	q8,d28,d3[1]
+	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+4]
+	vmlal.u32	q9,d29,d4[0]
+	vmlal.u32	q10,d29,d4[1]
+	vmlal.u32	q11,d29,d5[0]
+	vmlal.u32	q12,d29,d5[1]
+	vmlal.u32	q13,d29,d6[0]
+	vmlal.u32	q6,d29,d6[1]
+	vmlal.u32	q7,d29,d7[0]
+	vmlal.u32	q8,d29,d7[1]
+	vst1.64	{q9},[r7,:128]!
+	vmlal.u32	q10,d28,d0[0]
+	vld1.64	{q9},[r6,:128]
+	vmlal.u32	q11,d28,d0[1]
+	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+4]
+	vmlal.u32	q12,d28,d1[0]
+	it	ne
+	addne	r6,r6,#16	@ don't advance in last iteration
+	vmlal.u32	q13,d28,d1[1]
+	vmlal.u32	q6,d28,d2[0]
+	vmlal.u32	q7,d28,d2[1]
+	vmlal.u32	q8,d28,d3[0]
+	vmlal.u32	q9,d28,d3[1]
+	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+5]
+	vmlal.u32	q10,d29,d4[0]
+	vmlal.u32	q11,d29,d4[1]
+	vmlal.u32	q12,d29,d5[0]
+	vmlal.u32	q13,d29,d5[1]
+	vmlal.u32	q6,d29,d6[0]
+	vmlal.u32	q7,d29,d6[1]
+	vmlal.u32	q8,d29,d7[0]
+	vmlal.u32	q9,d29,d7[1]
+	vst1.64	{q10},[r7,:128]!
+	vmlal.u32	q11,d28,d0[0]
+	vld1.64	{q10},[r6,:128]
+	vmlal.u32	q12,d28,d0[1]
+	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+5]
+	vmlal.u32	q13,d28,d1[0]
+	it	ne
+	addne	r6,r6,#16	@ don't advance in last iteration
+	vmlal.u32	q6,d28,d1[1]
+	vmlal.u32	q7,d28,d2[0]
+	vmlal.u32	q8,d28,d2[1]
+	vmlal.u32	q9,d28,d3[0]
+	vmlal.u32	q10,d28,d3[1]
+	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+6]
+	vmlal.u32	q11,d29,d4[0]
+	vmlal.u32	q12,d29,d4[1]
+	vmlal.u32	q13,d29,d5[0]
+	vmlal.u32	q6,d29,d5[1]
+	vmlal.u32	q7,d29,d6[0]
+	vmlal.u32	q8,d29,d6[1]
+	vmlal.u32	q9,d29,d7[0]
+	vmlal.u32	q10,d29,d7[1]
+	vst1.64	{q11},[r7,:128]!
+	vmlal.u32	q12,d28,d0[0]
+	vld1.64	{q11},[r6,:128]
+	vmlal.u32	q13,d28,d0[1]
+	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+6]
+	vmlal.u32	q6,d28,d1[0]
+	it	ne
+	addne	r6,r6,#16	@ don't advance in last iteration
+	vmlal.u32	q7,d28,d1[1]
+	vmlal.u32	q8,d28,d2[0]
+	vmlal.u32	q9,d28,d2[1]
+	vmlal.u32	q10,d28,d3[0]
+	vmlal.u32	q11,d28,d3[1]
+	vld1.32	{d28},[r10,:64]!	@ pull smashed b[8*i+7]
+	vmlal.u32	q12,d29,d4[0]
+	vmlal.u32	q13,d29,d4[1]
+	vmlal.u32	q6,d29,d5[0]
+	vmlal.u32	q7,d29,d5[1]
+	vmlal.u32	q8,d29,d6[0]
+	vmlal.u32	q9,d29,d6[1]
+	vmlal.u32	q10,d29,d7[0]
+	vmlal.u32	q11,d29,d7[1]
+	vst1.64	{q12},[r7,:128]!
+	vmlal.u32	q13,d28,d0[0]
+	vld1.64	{q12},[r6,:128]
+	vmlal.u32	q6,d28,d0[1]
+	vld1.32	{d29},[r10,:64]!	@ pull smashed m[8*i+7]
+	vmlal.u32	q7,d28,d1[0]
+	it	ne
+	addne	r6,r6,#16	@ don't advance in last iteration
+	vmlal.u32	q8,d28,d1[1]
+	vmlal.u32	q9,d28,d2[0]
+	vmlal.u32	q10,d28,d2[1]
+	vmlal.u32	q11,d28,d3[0]
+	vmlal.u32	q12,d28,d3[1]
+	it	eq
+	subeq	r1,r1,r5,lsl#2	@ rewind
+	vmlal.u32	q13,d29,d4[0]
+	vld1.32	{d28},[sp,:64]		@ pull smashed b[8*i+0]
+	vmlal.u32	q6,d29,d4[1]
+	vld1.32	{d0,d1,d2,d3},[r1]!
+	vmlal.u32	q7,d29,d5[0]
+	add	r10,sp,#8		@ rewind
+	vmlal.u32	q8,d29,d5[1]
+	vmlal.u32	q9,d29,d6[0]
+	vmlal.u32	q10,d29,d6[1]
+	vmlal.u32	q11,d29,d7[0]
+	vst1.64	{q13},[r7,:128]!
+	vmlal.u32	q12,d29,d7[1]
+
+	bne	.LNEON_8n_inner
+	add	r6,sp,#128
+	vst1.64	{q6,q7},[r7,:256]!
+	veor	q2,q2,q2		@ d4-d5
+	vst1.64	{q8,q9},[r7,:256]!
+	veor	q3,q3,q3		@ d6-d7
+	vst1.64	{q10,q11},[r7,:256]!
+	vst1.64	{q12},[r7,:128]
+
+	subs	r9,r9,#8
+	vld1.64	{q6,q7},[r6,:256]!
+	vld1.64	{q8,q9},[r6,:256]!
+	vld1.64	{q10,q11},[r6,:256]!
+	vld1.64	{q12,q13},[r6,:256]!
+
+	itt	ne
+	subne	r3,r3,r5,lsl#2	@ rewind
+	bne	.LNEON_8n_outer
+
+	add	r7,sp,#128
+	vst1.64	{q2,q3}, [sp,:256]!	@ start wiping stack frame
+	vshr.u64	d10,d12,#16
+	vst1.64	{q2,q3},[sp,:256]!
+	vadd.u64	d13,d13,d10
+	vst1.64	{q2,q3}, [sp,:256]!
+	vshr.u64	d10,d13,#16
+	vst1.64	{q2,q3}, [sp,:256]!
+	vzip.16	d12,d13
+
+	mov	r8,r5
+	b	.LNEON_tail_entry
+
+.align	4
+.LNEON_tail:
+	vadd.u64	d12,d12,d10
+	vshr.u64	d10,d12,#16
+	vld1.64	{q8,q9}, [r6, :256]!
+	vadd.u64	d13,d13,d10
+	vld1.64	{q10,q11}, [r6, :256]!
+	vshr.u64	d10,d13,#16
+	vld1.64	{q12,q13}, [r6, :256]!
+	vzip.16	d12,d13
+
+.LNEON_tail_entry:
+	vadd.u64	d14,d14,d10
+	vst1.32	{d12[0]}, [r7, :32]!
+	vshr.u64	d10,d14,#16
+	vadd.u64	d15,d15,d10
+	vshr.u64	d10,d15,#16
+	vzip.16	d14,d15
+	vadd.u64	d16,d16,d10
+	vst1.32	{d14[0]}, [r7, :32]!
+	vshr.u64	d10,d16,#16
+	vadd.u64	d17,d17,d10
+	vshr.u64	d10,d17,#16
+	vzip.16	d16,d17
+	vadd.u64	d18,d18,d10
+	vst1.32	{d16[0]}, [r7, :32]!
+	vshr.u64	d10,d18,#16
+	vadd.u64	d19,d19,d10
+	vshr.u64	d10,d19,#16
+	vzip.16	d18,d19
+	vadd.u64	d20,d20,d10
+	vst1.32	{d18[0]}, [r7, :32]!
+	vshr.u64	d10,d20,#16
+	vadd.u64	d21,d21,d10
+	vshr.u64	d10,d21,#16
+	vzip.16	d20,d21
+	vadd.u64	d22,d22,d10
+	vst1.32	{d20[0]}, [r7, :32]!
+	vshr.u64	d10,d22,#16
+	vadd.u64	d23,d23,d10
+	vshr.u64	d10,d23,#16
+	vzip.16	d22,d23
+	vadd.u64	d24,d24,d10
+	vst1.32	{d22[0]}, [r7, :32]!
+	vshr.u64	d10,d24,#16
+	vadd.u64	d25,d25,d10
+	vshr.u64	d10,d25,#16
+	vzip.16	d24,d25
+	vadd.u64	d26,d26,d10
+	vst1.32	{d24[0]}, [r7, :32]!
+	vshr.u64	d10,d26,#16
+	vadd.u64	d27,d27,d10
+	vshr.u64	d10,d27,#16
+	vzip.16	d26,d27
+	vld1.64	{q6,q7}, [r6, :256]!
+	subs	r8,r8,#8
+	vst1.32	{d26[0]},   [r7, :32]!
+	bne	.LNEON_tail
+
+	vst1.32	{d10[0]}, [r7, :32]		@ top-most bit
+	sub	r3,r3,r5,lsl#2			@ rewind r3
+	subs	r1,sp,#0				@ clear carry flag
+	add	r2,sp,r5,lsl#2
+
+.LNEON_sub:
+	ldmia	r1!, {r4,r5,r6,r7}
+	ldmia	r3!, {r8,r9,r10,r11}
+	sbcs	r8, r4,r8
+	sbcs	r9, r5,r9
+	sbcs	r10,r6,r10
+	sbcs	r11,r7,r11
+	teq	r1,r2				@ preserves carry
+	stmia	r0!, {r8,r9,r10,r11}
+	bne	.LNEON_sub
+
+	ldr	r10, [r1]				@ load top-most bit
+	mov	r11,sp
+	veor	q0,q0,q0
+	sub	r11,r2,r11				@ this is num*4
+	veor	q1,q1,q1
+	mov	r1,sp
+	sub	r0,r0,r11				@ rewind r0
+	mov	r3,r2				@ second 3/4th of frame
+	sbcs	r10,r10,#0				@ result is carry flag
+
+.LNEON_copy_n_zap:
+	ldmia	r1!, {r4,r5,r6,r7}
+	ldmia	r0,  {r8,r9,r10,r11}
+	it	cc
+	movcc	r8, r4
+	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
+	itt	cc
+	movcc	r9, r5
+	movcc	r10,r6
+	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
+	it	cc
+	movcc	r11,r7
+	ldmia	r1, {r4,r5,r6,r7}
+	stmia	r0!, {r8,r9,r10,r11}
+	sub	r1,r1,#16
+	ldmia	r0, {r8,r9,r10,r11}
+	it	cc
+	movcc	r8, r4
+	vst1.64	{q0,q1}, [r1,:256]!			@ wipe
+	itt	cc
+	movcc	r9, r5
+	movcc	r10,r6
+	vst1.64	{q0,q1}, [r3,:256]!			@ wipe
+	it	cc
+	movcc	r11,r7
+	teq	r1,r2				@ preserves carry
+	stmia	r0!, {r8,r9,r10,r11}
+	bne	.LNEON_copy_n_zap
+
+	mov	sp,ip
+	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
+	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11}
+	bx	lr						@ bx lr
+.size	bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
+#endif
+.byte	77,111,110,116,103,111,109,101,114,121,32,109,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	2
+#if __ARM_MAX_ARCH__>=7
+.comm	OPENSSL_armcap_P,4,4
+.hidden	OPENSSL_armcap_P
+#endif
+#endif
+#endif  // !OPENSSL_NO_ASM
+.section	.note.GNU-stack,"",%progbits
+#endif  // defined(__arm__) && defined(__linux__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/armv8-mont.ios.aarch64.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/armv8-mont.ios.aarch64.S
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/armv8-mont.linux.aarch64.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/armv8-mont.linux.aarch64.S
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn-586.linux.x86.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn-586.linux.x86.S
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/add.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/add.c
@ -0,0 +1,316 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.] */
+
+#include <CBigNumBoringSSL_bn.h>
+
+#include <string.h>
+
+#include <CBigNumBoringSSL_err.h>
+#include <CBigNumBoringSSL_mem.h>
+
+#include "internal.h"
+
+
+int BN_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) {
+  const BIGNUM *tmp;
+  int a_neg = a->neg, ret;
+
+  //  a +  b	a+b
+  //  a + -b	a-b
+  // -a +  b	b-a
+  // -a + -b	-(a+b)
+  if (a_neg ^ b->neg) {
+    // only one is negative
+    if (a_neg) {
+      tmp = a;
+      a = b;
+      b = tmp;
+    }
+
+    // we are now a - b
+    if (BN_ucmp(a, b) < 0) {
+      if (!BN_usub(r, b, a)) {
+        return 0;
+      }
+      r->neg = 1;
+    } else {
+      if (!BN_usub(r, a, b)) {
+        return 0;
+      }
+      r->neg = 0;
+    }
+    return 1;
+  }
+
+  ret = BN_uadd(r, a, b);
+  r->neg = a_neg;
+  return ret;
+}
+
+int bn_uadd_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) {
+  // Widths are public, so we normalize to make |a| the larger one.
+  if (a->width < b->width) {
+    const BIGNUM *tmp = a;
+    a = b;
+    b = tmp;
+  }
+
+  int max = a->width;
+  int min = b->width;
+  if (!bn_wexpand(r, max + 1)) {
+    return 0;
+  }
+  r->width = max + 1;
+
+  BN_ULONG carry = bn_add_words(r->d, a->d, b->d, min);
+  for (int i = min; i < max; i++) {
+    // |r| and |a| may alias, so use a temporary.
+    BN_ULONG tmp = carry + a->d[i];
+    carry = tmp < a->d[i];
+    r->d[i] = tmp;
+  }
+
+  r->d[max] = carry;
+  return 1;
+}
+
+int BN_uadd(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) {
+  if (!bn_uadd_consttime(r, a, b)) {
+    return 0;
+  }
+  bn_set_minimal_width(r);
+  return 1;
+}
+
+int BN_add_word(BIGNUM *a, BN_ULONG w) {
+  BN_ULONG l;
+  int i;
+
+  // degenerate case: w is zero
+  if (!w) {
+    return 1;
+  }
+
+  // degenerate case: a is zero
+  if (BN_is_zero(a)) {
+    return BN_set_word(a, w);
+  }
+
+  // handle 'a' when negative
+  if (a->neg) {
+    a->neg = 0;
+    i = BN_sub_word(a, w);
+    if (!BN_is_zero(a)) {
+      a->neg = !(a->neg);
+    }
+    return i;
+  }
+
+  for (i = 0; w != 0 && i < a->width; i++) {
+    a->d[i] = l = a->d[i] + w;
+    w = (w > l) ? 1 : 0;
+  }
+
+  if (w && i == a->width) {
+    if (!bn_wexpand(a, a->width + 1)) {
+      return 0;
+    }
+    a->width++;
+    a->d[i] = w;
+  }
+
+  return 1;
+}
+
+int BN_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) {
+  int add = 0, neg = 0;
+  const BIGNUM *tmp;
+
+  //  a -  b	a-b
+  //  a - -b	a+b
+  // -a -  b	-(a+b)
+  // -a - -b	b-a
+  if (a->neg) {
+    if (b->neg) {
+      tmp = a;
+      a = b;
+      b = tmp;
+    } else {
+      add = 1;
+      neg = 1;
+    }
+  } else {
+    if (b->neg) {
+      add = 1;
+      neg = 0;
+    }
+  }
+
+  if (add) {
+    if (!BN_uadd(r, a, b)) {
+      return 0;
+    }
+
+    r->neg = neg;
+    return 1;
+  }
+
+  if (BN_ucmp(a, b) < 0) {
+    if (!BN_usub(r, b, a)) {
+      return 0;
+    }
+    r->neg = 1;
+  } else {
+    if (!BN_usub(r, a, b)) {
+      return 0;
+    }
+    r->neg = 0;
+  }
+
+  return 1;
+}
+
+int bn_usub_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) {
+  // |b| may have more words than |a| given non-minimal inputs, but all words
+  // beyond |a->width| must then be zero.
+  int b_width = b->width;
+  if (b_width > a->width) {
+    if (!bn_fits_in_words(b, a->width)) {
+      OPENSSL_PUT_ERROR(BN, BN_R_ARG2_LT_ARG3);
+      return 0;
+    }
+    b_width = a->width;
+  }
+
+  if (!bn_wexpand(r, a->width)) {
+    return 0;
+  }
+
+  BN_ULONG borrow = bn_sub_words(r->d, a->d, b->d, b_width);
+  for (int i = b_width; i < a->width; i++) {
+    // |r| and |a| may alias, so use a temporary.
+    BN_ULONG tmp = a->d[i];
+    r->d[i] = a->d[i] - borrow;
+    borrow = tmp < r->d[i];
+  }
+
+  if (borrow) {
+    OPENSSL_PUT_ERROR(BN, BN_R_ARG2_LT_ARG3);
+    return 0;
+  }
+
+  r->width = a->width;
+  r->neg = 0;
+  return 1;
+}
+
+int BN_usub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b) {
+  if (!bn_usub_consttime(r, a, b)) {
+    return 0;
+  }
+  bn_set_minimal_width(r);
+  return 1;
+}
+
+int BN_sub_word(BIGNUM *a, BN_ULONG w) {
+  int i;
+
+  // degenerate case: w is zero
+  if (!w) {
+    return 1;
+  }
+
+  // degenerate case: a is zero
+  if (BN_is_zero(a)) {
+    i = BN_set_word(a, w);
+    if (i != 0) {
+      BN_set_negative(a, 1);
+    }
+    return i;
+  }
+
+  // handle 'a' when negative
+  if (a->neg) {
+    a->neg = 0;
+    i = BN_add_word(a, w);
+    a->neg = 1;
+    return i;
+  }
+
+  if ((bn_minimal_width(a) == 1) && (a->d[0] < w)) {
+    a->d[0] = w - a->d[0];
+    a->neg = 1;
+    return 1;
+  }
+
+  i = 0;
+  for (;;) {
+    if (a->d[i] >= w) {
+      a->d[i] -= w;
+      break;
+    } else {
+      a->d[i] -= w;
+      i++;
+      w = 1;
+    }
+  }
+
+  if ((a->d[i] == 0) && (i == (a->width - 1))) {
+    a->width--;
+  }
+
+  return 1;
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/asm/x86_64-gcc.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/asm/x86_64-gcc.c
@ -0,0 +1,541 @@
+/* x86_64 BIGNUM accelerator version 0.1, December 2002.
+ *
+ * Implemented by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+ * project.
+ *
+ * Rights for redistribution and usage in source and binary forms are
+ * granted according to the OpenSSL license. Warranty of any kind is
+ * disclaimed.
+ *
+ * Q. Version 0.1? It doesn't sound like Andy, he used to assign real
+ *    versions, like 1.0...
+ * A. Well, that's because this code is basically a quick-n-dirty
+ *    proof-of-concept hack. As you can see it's implemented with
+ *    inline assembler, which means that you're bound to GCC and that
+ *    there might be enough room for further improvement.
+ *
+ * Q. Why inline assembler?
+ * A. x86_64 features own ABI which I'm not familiar with. This is
+ *    why I decided to let the compiler take care of subroutine
+ *    prologue/epilogue as well as register allocation. For reference.
+ *    Win64 implements different ABI for AMD64, different from Linux.
+ *
+ * Q. How much faster does it get?
+ * A. 'apps/openssl speed rsa dsa' output with no-asm:
+ *
+ *	                  sign    verify    sign/s verify/s
+ *	rsa  512 bits   0.0006s   0.0001s   1683.8  18456.2
+ *	rsa 1024 bits   0.0028s   0.0002s    356.0   6407.0
+ *	rsa 2048 bits   0.0172s   0.0005s     58.0   1957.8
+ *	rsa 4096 bits   0.1155s   0.0018s      8.7    555.6
+ *	                  sign    verify    sign/s verify/s
+ *	dsa  512 bits   0.0005s   0.0006s   2100.8   1768.3
+ *	dsa 1024 bits   0.0014s   0.0018s    692.3    559.2
+ *	dsa 2048 bits   0.0049s   0.0061s    204.7    165.0
+ *
+ *    'apps/openssl speed rsa dsa' output with this module:
+ *
+ *	                  sign    verify    sign/s verify/s
+ *	rsa  512 bits   0.0004s   0.0000s   2767.1  33297.9
+ *	rsa 1024 bits   0.0012s   0.0001s    867.4  14674.7
+ *	rsa 2048 bits   0.0061s   0.0002s    164.0   5270.0
+ *	rsa 4096 bits   0.0384s   0.0006s     26.1   1650.8
+ *	                  sign    verify    sign/s verify/s
+ *	dsa  512 bits   0.0002s   0.0003s   4442.2   3786.3
+ *	dsa 1024 bits   0.0005s   0.0007s   1835.1   1497.4
+ *	dsa 2048 bits   0.0016s   0.0020s    620.4    504.6
+ *
+ *    For the reference. IA-32 assembler implementation performs
+ *    very much like 64-bit code compiled with no-asm on the same
+ *    machine.
+ */
+
+#include <CBigNumBoringSSL_bn.h>
+
+// TODO(davidben): Get this file working on MSVC x64.
+#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) && \
+    (defined(__GNUC__) || defined(__clang__))
+
+#include "../internal.h"
+
+
+#undef mul
+#undef mul_add
+
+// "m"(a), "+m"(r)	is the way to favor DirectPath µ-code;
+// "g"(0)		let the compiler to decide where does it
+//			want to keep the value of zero;
+#define mul_add(r, a, word, carry)                                         \
+  do {                                                                     \
+    register BN_ULONG high, low;                                           \
+    __asm__("mulq %3" : "=a"(low), "=d"(high) : "a"(word), "m"(a) : "cc"); \
+    __asm__("addq %2,%0; adcq %3,%1"                                       \
+            : "+r"(carry), "+d"(high)                                      \
+            : "a"(low), "g"(0)                                             \
+            : "cc");                                                       \
+    __asm__("addq %2,%0; adcq %3,%1"                                       \
+            : "+m"(r), "+d"(high)                                          \
+            : "r"(carry), "g"(0)                                           \
+            : "cc");                                                       \
+    (carry) = high;                                                        \
+  } while (0)
+
+#define mul(r, a, word, carry)                                             \
+  do {                                                                     \
+    register BN_ULONG high, low;                                           \
+    __asm__("mulq %3" : "=a"(low), "=d"(high) : "a"(word), "g"(a) : "cc"); \
+    __asm__("addq %2,%0; adcq %3,%1"                                       \
+            : "+r"(carry), "+d"(high)                                      \
+            : "a"(low), "g"(0)                                             \
+            : "cc");                                                       \
+    (r) = (carry);                                                         \
+    (carry) = high;                                                        \
+  } while (0)
+#undef sqr
+#define sqr(r0, r1, a) __asm__("mulq %2" : "=a"(r0), "=d"(r1) : "a"(a) : "cc");
+
+BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
+                          BN_ULONG w) {
+  BN_ULONG c1 = 0;
+
+  if (num == 0) {
+    return (c1);
+  }
+
+  while (num & ~3) {
+    mul_add(rp[0], ap[0], w, c1);
+    mul_add(rp[1], ap[1], w, c1);
+    mul_add(rp[2], ap[2], w, c1);
+    mul_add(rp[3], ap[3], w, c1);
+    ap += 4;
+    rp += 4;
+    num -= 4;
+  }
+  if (num) {
+    mul_add(rp[0], ap[0], w, c1);
+    if (--num == 0) {
+      return c1;
+    }
+    mul_add(rp[1], ap[1], w, c1);
+    if (--num == 0) {
+      return c1;
+    }
+    mul_add(rp[2], ap[2], w, c1);
+    return c1;
+  }
+
+  return c1;
+}
+
+BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
+                      BN_ULONG w) {
+  BN_ULONG c1 = 0;
+
+  if (num == 0) {
+    return c1;
+  }
+
+  while (num & ~3) {
+    mul(rp[0], ap[0], w, c1);
+    mul(rp[1], ap[1], w, c1);
+    mul(rp[2], ap[2], w, c1);
+    mul(rp[3], ap[3], w, c1);
+    ap += 4;
+    rp += 4;
+    num -= 4;
+  }
+  if (num) {
+    mul(rp[0], ap[0], w, c1);
+    if (--num == 0) {
+      return c1;
+    }
+    mul(rp[1], ap[1], w, c1);
+    if (--num == 0) {
+      return c1;
+    }
+    mul(rp[2], ap[2], w, c1);
+  }
+  return c1;
+}
+
+void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, size_t n) {
+  if (n == 0) {
+    return;
+  }
+
+  while (n & ~3) {
+    sqr(r[0], r[1], a[0]);
+    sqr(r[2], r[3], a[1]);
+    sqr(r[4], r[5], a[2]);
+    sqr(r[6], r[7], a[3]);
+    a += 4;
+    r += 8;
+    n -= 4;
+  }
+  if (n) {
+    sqr(r[0], r[1], a[0]);
+    if (--n == 0) {
+      return;
+    }
+    sqr(r[2], r[3], a[1]);
+    if (--n == 0) {
+      return;
+    }
+    sqr(r[4], r[5], a[2]);
+  }
+}
+
+BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+                      size_t n) {
+  BN_ULONG ret;
+  size_t i = 0;
+
+  if (n == 0) {
+    return 0;
+  }
+
+  __asm__ volatile (
+      "	subq	%0,%0		\n"  // clear carry
+      "	jmp	1f		\n"
+      ".p2align 4			\n"
+      "1:"
+      "	movq	(%4,%2,8),%0	\n"
+      "	adcq	(%5,%2,8),%0	\n"
+      "	movq	%0,(%3,%2,8)	\n"
+      "	lea	1(%2),%2	\n"
+      "	dec	%1		\n"
+      "	jnz	1b		\n"
+      "	sbbq	%0,%0		\n"
+      : "=&r"(ret), "+c"(n), "+r"(i)
+      : "r"(rp), "r"(ap), "r"(bp)
+      : "cc", "memory");
+
+  return ret & 1;
+}
+
+BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+                      size_t n) {
+  BN_ULONG ret;
+  size_t i = 0;
+
+  if (n == 0) {
+    return 0;
+  }
+
+  __asm__ volatile (
+      "	subq	%0,%0		\n"  // clear borrow
+      "	jmp	1f		\n"
+      ".p2align 4			\n"
+      "1:"
+      "	movq	(%4,%2,8),%0	\n"
+      "	sbbq	(%5,%2,8),%0	\n"
+      "	movq	%0,(%3,%2,8)	\n"
+      "	lea	1(%2),%2	\n"
+      "	dec	%1		\n"
+      "	jnz	1b		\n"
+      "	sbbq	%0,%0		\n"
+      : "=&r"(ret), "+c"(n), "+r"(i)
+      : "r"(rp), "r"(ap), "r"(bp)
+      : "cc", "memory");
+
+  return ret & 1;
+}
+
+// mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0)
+// mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0)
+// sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0)
+// sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0)
+
+// Keep in mind that carrying into high part of multiplication result can not
+// overflow, because it cannot be all-ones.
+#define mul_add_c(a, b, c0, c1, c2)                                  \
+  do {                                                               \
+    BN_ULONG t1, t2;                                                 \
+    __asm__("mulq %3" : "=a"(t1), "=d"(t2) : "a"(a), "m"(b) : "cc"); \
+    __asm__("addq %3,%0; adcq %4,%1; adcq %5,%2"                     \
+            : "+r"(c0), "+r"(c1), "+r"(c2)                           \
+            : "r"(t1), "r"(t2), "g"(0)                               \
+            : "cc");                                                 \
+  } while (0)
+
+#define sqr_add_c(a, i, c0, c1, c2)                               \
+  do {                                                            \
+    BN_ULONG t1, t2;                                              \
+    __asm__("mulq %2" : "=a"(t1), "=d"(t2) : "a"((a)[i]) : "cc"); \
+    __asm__("addq %3,%0; adcq %4,%1; adcq %5,%2"                  \
+            : "+r"(c0), "+r"(c1), "+r"(c2)                        \
+            : "r"(t1), "r"(t2), "g"(0)                            \
+            : "cc");                                              \
+  } while (0)
+
+#define mul_add_c2(a, b, c0, c1, c2)                                 \
+  do {                                                               \
+    BN_ULONG t1, t2;                                                 \
+    __asm__("mulq %3" : "=a"(t1), "=d"(t2) : "a"(a), "m"(b) : "cc"); \
+    __asm__("addq %3,%0; adcq %4,%1; adcq %5,%2"                     \
+            : "+r"(c0), "+r"(c1), "+r"(c2)                           \
+            : "r"(t1), "r"(t2), "g"(0)                               \
+            : "cc");                                                 \
+    __asm__("addq %3,%0; adcq %4,%1; adcq %5,%2"                     \
+            : "+r"(c0), "+r"(c1), "+r"(c2)                           \
+            : "r"(t1), "r"(t2), "g"(0)                               \
+            : "cc");                                                 \
+  } while (0)
+
+#define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
+
+void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]) {
+  BN_ULONG c1, c2, c3;
+
+  c1 = 0;
+  c2 = 0;
+  c3 = 0;
+  mul_add_c(a[0], b[0], c1, c2, c3);
+  r[0] = c1;
+  c1 = 0;
+  mul_add_c(a[0], b[1], c2, c3, c1);
+  mul_add_c(a[1], b[0], c2, c3, c1);
+  r[1] = c2;
+  c2 = 0;
+  mul_add_c(a[2], b[0], c3, c1, c2);
+  mul_add_c(a[1], b[1], c3, c1, c2);
+  mul_add_c(a[0], b[2], c3, c1, c2);
+  r[2] = c3;
+  c3 = 0;
+  mul_add_c(a[0], b[3], c1, c2, c3);
+  mul_add_c(a[1], b[2], c1, c2, c3);
+  mul_add_c(a[2], b[1], c1, c2, c3);
+  mul_add_c(a[3], b[0], c1, c2, c3);
+  r[3] = c1;
+  c1 = 0;
+  mul_add_c(a[4], b[0], c2, c3, c1);
+  mul_add_c(a[3], b[1], c2, c3, c1);
+  mul_add_c(a[2], b[2], c2, c3, c1);
+  mul_add_c(a[1], b[3], c2, c3, c1);
+  mul_add_c(a[0], b[4], c2, c3, c1);
+  r[4] = c2;
+  c2 = 0;
+  mul_add_c(a[0], b[5], c3, c1, c2);
+  mul_add_c(a[1], b[4], c3, c1, c2);
+  mul_add_c(a[2], b[3], c3, c1, c2);
+  mul_add_c(a[3], b[2], c3, c1, c2);
+  mul_add_c(a[4], b[1], c3, c1, c2);
+  mul_add_c(a[5], b[0], c3, c1, c2);
+  r[5] = c3;
+  c3 = 0;
+  mul_add_c(a[6], b[0], c1, c2, c3);
+  mul_add_c(a[5], b[1], c1, c2, c3);
+  mul_add_c(a[4], b[2], c1, c2, c3);
+  mul_add_c(a[3], b[3], c1, c2, c3);
+  mul_add_c(a[2], b[4], c1, c2, c3);
+  mul_add_c(a[1], b[5], c1, c2, c3);
+  mul_add_c(a[0], b[6], c1, c2, c3);
+  r[6] = c1;
+  c1 = 0;
+  mul_add_c(a[0], b[7], c2, c3, c1);
+  mul_add_c(a[1], b[6], c2, c3, c1);
+  mul_add_c(a[2], b[5], c2, c3, c1);
+  mul_add_c(a[3], b[4], c2, c3, c1);
+  mul_add_c(a[4], b[3], c2, c3, c1);
+  mul_add_c(a[5], b[2], c2, c3, c1);
+  mul_add_c(a[6], b[1], c2, c3, c1);
+  mul_add_c(a[7], b[0], c2, c3, c1);
+  r[7] = c2;
+  c2 = 0;
+  mul_add_c(a[7], b[1], c3, c1, c2);
+  mul_add_c(a[6], b[2], c3, c1, c2);
+  mul_add_c(a[5], b[3], c3, c1, c2);
+  mul_add_c(a[4], b[4], c3, c1, c2);
+  mul_add_c(a[3], b[5], c3, c1, c2);
+  mul_add_c(a[2], b[6], c3, c1, c2);
+  mul_add_c(a[1], b[7], c3, c1, c2);
+  r[8] = c3;
+  c3 = 0;
+  mul_add_c(a[2], b[7], c1, c2, c3);
+  mul_add_c(a[3], b[6], c1, c2, c3);
+  mul_add_c(a[4], b[5], c1, c2, c3);
+  mul_add_c(a[5], b[4], c1, c2, c3);
+  mul_add_c(a[6], b[3], c1, c2, c3);
+  mul_add_c(a[7], b[2], c1, c2, c3);
+  r[9] = c1;
+  c1 = 0;
+  mul_add_c(a[7], b[3], c2, c3, c1);
+  mul_add_c(a[6], b[4], c2, c3, c1);
+  mul_add_c(a[5], b[5], c2, c3, c1);
+  mul_add_c(a[4], b[6], c2, c3, c1);
+  mul_add_c(a[3], b[7], c2, c3, c1);
+  r[10] = c2;
+  c2 = 0;
+  mul_add_c(a[4], b[7], c3, c1, c2);
+  mul_add_c(a[5], b[6], c3, c1, c2);
+  mul_add_c(a[6], b[5], c3, c1, c2);
+  mul_add_c(a[7], b[4], c3, c1, c2);
+  r[11] = c3;
+  c3 = 0;
+  mul_add_c(a[7], b[5], c1, c2, c3);
+  mul_add_c(a[6], b[6], c1, c2, c3);
+  mul_add_c(a[5], b[7], c1, c2, c3);
+  r[12] = c1;
+  c1 = 0;
+  mul_add_c(a[6], b[7], c2, c3, c1);
+  mul_add_c(a[7], b[6], c2, c3, c1);
+  r[13] = c2;
+  c2 = 0;
+  mul_add_c(a[7], b[7], c3, c1, c2);
+  r[14] = c3;
+  r[15] = c1;
+}
+
+void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]) {
+  BN_ULONG c1, c2, c3;
+
+  c1 = 0;
+  c2 = 0;
+  c3 = 0;
+  mul_add_c(a[0], b[0], c1, c2, c3);
+  r[0] = c1;
+  c1 = 0;
+  mul_add_c(a[0], b[1], c2, c3, c1);
+  mul_add_c(a[1], b[0], c2, c3, c1);
+  r[1] = c2;
+  c2 = 0;
+  mul_add_c(a[2], b[0], c3, c1, c2);
+  mul_add_c(a[1], b[1], c3, c1, c2);
+  mul_add_c(a[0], b[2], c3, c1, c2);
+  r[2] = c3;
+  c3 = 0;
+  mul_add_c(a[0], b[3], c1, c2, c3);
+  mul_add_c(a[1], b[2], c1, c2, c3);
+  mul_add_c(a[2], b[1], c1, c2, c3);
+  mul_add_c(a[3], b[0], c1, c2, c3);
+  r[3] = c1;
+  c1 = 0;
+  mul_add_c(a[3], b[1], c2, c3, c1);
+  mul_add_c(a[2], b[2], c2, c3, c1);
+  mul_add_c(a[1], b[3], c2, c3, c1);
+  r[4] = c2;
+  c2 = 0;
+  mul_add_c(a[2], b[3], c3, c1, c2);
+  mul_add_c(a[3], b[2], c3, c1, c2);
+  r[5] = c3;
+  c3 = 0;
+  mul_add_c(a[3], b[3], c1, c2, c3);
+  r[6] = c1;
+  r[7] = c2;
+}
+
+void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[8]) {
+  BN_ULONG c1, c2, c3;
+
+  c1 = 0;
+  c2 = 0;
+  c3 = 0;
+  sqr_add_c(a, 0, c1, c2, c3);
+  r[0] = c1;
+  c1 = 0;
+  sqr_add_c2(a, 1, 0, c2, c3, c1);
+  r[1] = c2;
+  c2 = 0;
+  sqr_add_c(a, 1, c3, c1, c2);
+  sqr_add_c2(a, 2, 0, c3, c1, c2);
+  r[2] = c3;
+  c3 = 0;
+  sqr_add_c2(a, 3, 0, c1, c2, c3);
+  sqr_add_c2(a, 2, 1, c1, c2, c3);
+  r[3] = c1;
+  c1 = 0;
+  sqr_add_c(a, 2, c2, c3, c1);
+  sqr_add_c2(a, 3, 1, c2, c3, c1);
+  sqr_add_c2(a, 4, 0, c2, c3, c1);
+  r[4] = c2;
+  c2 = 0;
+  sqr_add_c2(a, 5, 0, c3, c1, c2);
+  sqr_add_c2(a, 4, 1, c3, c1, c2);
+  sqr_add_c2(a, 3, 2, c3, c1, c2);
+  r[5] = c3;
+  c3 = 0;
+  sqr_add_c(a, 3, c1, c2, c3);
+  sqr_add_c2(a, 4, 2, c1, c2, c3);
+  sqr_add_c2(a, 5, 1, c1, c2, c3);
+  sqr_add_c2(a, 6, 0, c1, c2, c3);
+  r[6] = c1;
+  c1 = 0;
+  sqr_add_c2(a, 7, 0, c2, c3, c1);
+  sqr_add_c2(a, 6, 1, c2, c3, c1);
+  sqr_add_c2(a, 5, 2, c2, c3, c1);
+  sqr_add_c2(a, 4, 3, c2, c3, c1);
+  r[7] = c2;
+  c2 = 0;
+  sqr_add_c(a, 4, c3, c1, c2);
+  sqr_add_c2(a, 5, 3, c3, c1, c2);
+  sqr_add_c2(a, 6, 2, c3, c1, c2);
+  sqr_add_c2(a, 7, 1, c3, c1, c2);
+  r[8] = c3;
+  c3 = 0;
+  sqr_add_c2(a, 7, 2, c1, c2, c3);
+  sqr_add_c2(a, 6, 3, c1, c2, c3);
+  sqr_add_c2(a, 5, 4, c1, c2, c3);
+  r[9] = c1;
+  c1 = 0;
+  sqr_add_c(a, 5, c2, c3, c1);
+  sqr_add_c2(a, 6, 4, c2, c3, c1);
+  sqr_add_c2(a, 7, 3, c2, c3, c1);
+  r[10] = c2;
+  c2 = 0;
+  sqr_add_c2(a, 7, 4, c3, c1, c2);
+  sqr_add_c2(a, 6, 5, c3, c1, c2);
+  r[11] = c3;
+  c3 = 0;
+  sqr_add_c(a, 6, c1, c2, c3);
+  sqr_add_c2(a, 7, 5, c1, c2, c3);
+  r[12] = c1;
+  c1 = 0;
+  sqr_add_c2(a, 7, 6, c2, c3, c1);
+  r[13] = c2;
+  c2 = 0;
+  sqr_add_c(a, 7, c3, c1, c2);
+  r[14] = c3;
+  r[15] = c1;
+}
+
+void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]) {
+  BN_ULONG c1, c2, c3;
+
+  c1 = 0;
+  c2 = 0;
+  c3 = 0;
+  sqr_add_c(a, 0, c1, c2, c3);
+  r[0] = c1;
+  c1 = 0;
+  sqr_add_c2(a, 1, 0, c2, c3, c1);
+  r[1] = c2;
+  c2 = 0;
+  sqr_add_c(a, 1, c3, c1, c2);
+  sqr_add_c2(a, 2, 0, c3, c1, c2);
+  r[2] = c3;
+  c3 = 0;
+  sqr_add_c2(a, 3, 0, c1, c2, c3);
+  sqr_add_c2(a, 2, 1, c1, c2, c3);
+  r[3] = c1;
+  c1 = 0;
+  sqr_add_c(a, 2, c2, c3, c1);
+  sqr_add_c2(a, 3, 1, c2, c3, c1);
+  r[4] = c2;
+  c2 = 0;
+  sqr_add_c2(a, 3, 2, c3, c1, c2);
+  r[5] = c3;
+  c3 = 0;
+  sqr_add_c(a, 3, c1, c2, c3);
+  r[6] = c1;
+  r[7] = c2;
+}
+
+#undef mul_add
+#undef mul
+#undef sqr
+#undef mul_add_c
+#undef sqr_add_c
+#undef mul_add_c2
+#undef sqr_add_c2
+
+#endif  // !NO_ASM && X86_64 && (__GNUC__ || __clang__)
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/bn.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/bn.c
@ -0,0 +1,445 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.] */
+
+#include <CBigNumBoringSSL_bn.h>
+
+#include <limits.h>
+#include <string.h>
+
+#include <CBigNumBoringSSL_err.h>
+#include <CBigNumBoringSSL_mem.h>
+
+#include "internal.h"
+#include "../delocate.h"
+
+
+BIGNUM *BN_new(void) {
+  BIGNUM *bn = OPENSSL_malloc(sizeof(BIGNUM));
+
+  if (bn == NULL) {
+    OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
+    return NULL;
+  }
+
+  OPENSSL_memset(bn, 0, sizeof(BIGNUM));
+  bn->flags = BN_FLG_MALLOCED;
+
+  return bn;
+}
+
+void BN_init(BIGNUM *bn) {
+  OPENSSL_memset(bn, 0, sizeof(BIGNUM));
+}
+
+void BN_free(BIGNUM *bn) {
+  if (bn == NULL) {
+    return;
+  }
+
+  if ((bn->flags & BN_FLG_STATIC_DATA) == 0) {
+    OPENSSL_free(bn->d);
+  }
+
+  if (bn->flags & BN_FLG_MALLOCED) {
+    OPENSSL_free(bn);
+  } else {
+    bn->d = NULL;
+  }
+}
+
+void BN_clear_free(BIGNUM *bn) {
+  char should_free;
+
+  if (bn == NULL) {
+    return;
+  }
+
+  if (bn->d != NULL) {
+    if ((bn->flags & BN_FLG_STATIC_DATA) == 0) {
+      OPENSSL_free(bn->d);
+    } else {
+      OPENSSL_cleanse(bn->d, bn->dmax * sizeof(bn->d[0]));
+    }
+  }
+
+  should_free = (bn->flags & BN_FLG_MALLOCED) != 0;
+  if (should_free) {
+    OPENSSL_free(bn);
+  } else {
+    OPENSSL_cleanse(bn, sizeof(BIGNUM));
+  }
+}
+
+BIGNUM *BN_dup(const BIGNUM *src) {
+  BIGNUM *copy;
+
+  if (src == NULL) {
+    return NULL;
+  }
+
+  copy = BN_new();
+  if (copy == NULL) {
+    return NULL;
+  }
+
+  if (!BN_copy(copy, src)) {
+    BN_free(copy);
+    return NULL;
+  }
+
+  return copy;
+}
+
+BIGNUM *BN_copy(BIGNUM *dest, const BIGNUM *src) {
+  if (src == dest) {
+    return dest;
+  }
+
+  if (!bn_wexpand(dest, src->width)) {
+    return NULL;
+  }
+
+  OPENSSL_memcpy(dest->d, src->d, sizeof(src->d[0]) * src->width);
+
+  dest->width = src->width;
+  dest->neg = src->neg;
+  return dest;
+}
+
+void BN_clear(BIGNUM *bn) {
+  if (bn->d != NULL) {
+    OPENSSL_memset(bn->d, 0, bn->dmax * sizeof(bn->d[0]));
+  }
+
+  bn->width = 0;
+  bn->neg = 0;
+}
+
+DEFINE_METHOD_FUNCTION(BIGNUM, BN_value_one) {
+  static const BN_ULONG kOneLimbs[1] = { 1 };
+  out->d = (BN_ULONG*) kOneLimbs;
+  out->width = 1;
+  out->dmax = 1;
+  out->neg = 0;
+  out->flags = BN_FLG_STATIC_DATA;
+}
+
+// BN_num_bits_word returns the minimum number of bits needed to represent the
+// value in |l|.
+unsigned BN_num_bits_word(BN_ULONG l) {
+  // |BN_num_bits| is often called on RSA prime factors. These have public bit
+  // lengths, but all bits beyond the high bit are secret, so count bits in
+  // constant time.
+  BN_ULONG x, mask;
+  int bits = (l != 0);
+
+#if BN_BITS2 > 32
+  // Look at the upper half of |x|. |x| is at most 64 bits long.
+  x = l >> 32;
+  // Set |mask| to all ones if |x| (the top 32 bits of |l|) is non-zero and all
+  // all zeros otherwise.
+  mask = 0u - x;
+  mask = (0u - (mask >> (BN_BITS2 - 1)));
+  // If |x| is non-zero, the lower half is included in the bit count in full,
+  // and we count the upper half. Otherwise, we count the lower half.
+  bits += 32 & mask;
+  l ^= (x ^ l) & mask;  // |l| is |x| if |mask| and remains |l| otherwise.
+#endif
+
+  // The remaining blocks are analogous iterations at lower powers of two.
+  x = l >> 16;
+  mask = 0u - x;
+  mask = (0u - (mask >> (BN_BITS2 - 1)));
+  bits += 16 & mask;
+  l ^= (x ^ l) & mask;
+
+  x = l >> 8;
+  mask = 0u - x;
+  mask = (0u - (mask >> (BN_BITS2 - 1)));
+  bits += 8 & mask;
+  l ^= (x ^ l) & mask;
+
+  x = l >> 4;
+  mask = 0u - x;
+  mask = (0u - (mask >> (BN_BITS2 - 1)));
+  bits += 4 & mask;
+  l ^= (x ^ l) & mask;
+
+  x = l >> 2;
+  mask = 0u - x;
+  mask = (0u - (mask >> (BN_BITS2 - 1)));
+  bits += 2 & mask;
+  l ^= (x ^ l) & mask;
+
+  x = l >> 1;
+  mask = 0u - x;
+  mask = (0u - (mask >> (BN_BITS2 - 1)));
+  bits += 1 & mask;
+
+  return bits;
+}
+
+unsigned BN_num_bits(const BIGNUM *bn) {
+  const int width = bn_minimal_width(bn);
+  if (width == 0) {
+    return 0;
+  }
+
+  return (width - 1) * BN_BITS2 + BN_num_bits_word(bn->d[width - 1]);
+}
+
+unsigned BN_num_bytes(const BIGNUM *bn) {
+  return (BN_num_bits(bn) + 7) / 8;
+}
+
+void BN_zero(BIGNUM *bn) {
+  bn->width = bn->neg = 0;
+}
+
+int BN_one(BIGNUM *bn) {
+  return BN_set_word(bn, 1);
+}
+
+int BN_set_word(BIGNUM *bn, BN_ULONG value) {
+  if (value == 0) {
+    BN_zero(bn);
+    return 1;
+  }
+
+  if (!bn_wexpand(bn, 1)) {
+    return 0;
+  }
+
+  bn->neg = 0;
+  bn->d[0] = value;
+  bn->width = 1;
+  return 1;
+}
+
+int BN_set_u64(BIGNUM *bn, uint64_t value) {
+#if BN_BITS2 == 64
+  return BN_set_word(bn, value);
+#elif BN_BITS2 == 32
+  if (value <= BN_MASK2) {
+    return BN_set_word(bn, (BN_ULONG)value);
+  }
+
+  if (!bn_wexpand(bn, 2)) {
+    return 0;
+  }
+
+  bn->neg = 0;
+  bn->d[0] = (BN_ULONG)value;
+  bn->d[1] = (BN_ULONG)(value >> 32);
+  bn->width = 2;
+  return 1;
+#else
+#error "BN_BITS2 must be 32 or 64."
+#endif
+}
+
+int bn_set_words(BIGNUM *bn, const BN_ULONG *words, size_t num) {
+  if (!bn_wexpand(bn, num)) {
+    return 0;
+  }
+  OPENSSL_memmove(bn->d, words, num * sizeof(BN_ULONG));
+  // |bn_wexpand| verified that |num| isn't too large.
+  bn->width = (int)num;
+  bn->neg = 0;
+  return 1;
+}
+
+int bn_fits_in_words(const BIGNUM *bn, size_t num) {
+  // All words beyond |num| must be zero.
+  BN_ULONG mask = 0;
+  for (size_t i = num; i < (size_t)bn->width; i++) {
+    mask |= bn->d[i];
+  }
+  return mask == 0;
+}
+
+int bn_copy_words(BN_ULONG *out, size_t num, const BIGNUM *bn) {
+  if (bn->neg) {
+    OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
+    return 0;
+  }
+
+  size_t width = (size_t)bn->width;
+  if (width > num) {
+    if (!bn_fits_in_words(bn, num)) {
+      OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
+      return 0;
+    }
+    width = num;
+  }
+
+  OPENSSL_memset(out, 0, sizeof(BN_ULONG) * num);
+  OPENSSL_memcpy(out, bn->d, sizeof(BN_ULONG) * width);
+  return 1;
+}
+
+int BN_is_negative(const BIGNUM *bn) {
+  return bn->neg != 0;
+}
+
+void BN_set_negative(BIGNUM *bn, int sign) {
+  if (sign && !BN_is_zero(bn)) {
+    bn->neg = 1;
+  } else {
+    bn->neg = 0;
+  }
+}
+
+int bn_wexpand(BIGNUM *bn, size_t words) {
+  BN_ULONG *a;
+
+  if (words <= (size_t)bn->dmax) {
+    return 1;
+  }
+
+  if (words > (INT_MAX / (4 * BN_BITS2))) {
+    OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
+    return 0;
+  }
+
+  if (bn->flags & BN_FLG_STATIC_DATA) {
+    OPENSSL_PUT_ERROR(BN, BN_R_EXPAND_ON_STATIC_BIGNUM_DATA);
+    return 0;
+  }
+
+  a = OPENSSL_malloc(sizeof(BN_ULONG) * words);
+  if (a == NULL) {
+    OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
+    return 0;
+  }
+
+  OPENSSL_memcpy(a, bn->d, sizeof(BN_ULONG) * bn->width);
+
+  OPENSSL_free(bn->d);
+  bn->d = a;
+  bn->dmax = (int)words;
+
+  return 1;
+}
+
+int bn_expand(BIGNUM *bn, size_t bits) {
+  if (bits + BN_BITS2 - 1 < bits) {
+    OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
+    return 0;
+  }
+  return bn_wexpand(bn, (bits+BN_BITS2-1)/BN_BITS2);
+}
+
+int bn_resize_words(BIGNUM *bn, size_t words) {
+#if defined(OPENSSL_PPC64LE)
+  // This is a workaround for a miscompilation bug in Clang 7.0.1 on POWER.
+  // The unittests catch the miscompilation, if it occurs, and it manifests
+  // as a crash in |bn_fits_in_words|.
+  //
+  // The bug only triggers if building in FIPS mode and with -O3. Clang 8.0.1
+  // has the same bug but this workaround is not effective there---I've not
+  // been able to find a workaround for 8.0.1.
+  //
+  // At the time of writing (2019-08-08), Clang git does *not* have this bug
+  // and does not need this workaroud. The current git version should go on to
+  // be Clang 10 thus, once we can depend on that, this can be removed.
+  if (value_barrier_w((size_t)bn->width == words)) {
+    return 1;
+  }
+#endif
+
+  if ((size_t)bn->width <= words) {
+    if (!bn_wexpand(bn, words)) {
+      return 0;
+    }
+    OPENSSL_memset(bn->d + bn->width, 0,
+                   (words - bn->width) * sizeof(BN_ULONG));
+    bn->width = words;
+    return 1;
+  }
+
+  // All words beyond the new width must be zero.
+  if (!bn_fits_in_words(bn, words)) {
+    OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
+    return 0;
+  }
+  bn->width = words;
+  return 1;
+}
+
+void bn_select_words(BN_ULONG *r, BN_ULONG mask, const BN_ULONG *a,
+                     const BN_ULONG *b, size_t num) {
+  for (size_t i = 0; i < num; i++) {
+    OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t),
+                          "crypto_word_t is too small");
+    r[i] = constant_time_select_w(mask, a[i], b[i]);
+  }
+}
+
+int bn_minimal_width(const BIGNUM *bn) {
+  int ret = bn->width;
+  while (ret > 0 && bn->d[ret - 1] == 0) {
+    ret--;
+  }
+  return ret;
+}
+
+void bn_set_minimal_width(BIGNUM *bn) {
+  bn->width = bn_minimal_width(bn);
+  if (bn->width == 0) {
+    bn->neg = 0;
+  }
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/bytes.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/bytes.c
@ -0,0 +1,230 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.] */
+
+#include <CBigNumBoringSSL_bn.h>
+
+#include <assert.h>
+#include <limits.h>
+
+#include "internal.h"
+
+
+BIGNUM *BN_bin2bn(const uint8_t *in, size_t len, BIGNUM *ret) {
+  size_t num_words;
+  unsigned m;
+  BN_ULONG word = 0;
+  BIGNUM *bn = NULL;
+
+  if (ret == NULL) {
+    ret = bn = BN_new();
+  }
+
+  if (ret == NULL) {
+    return NULL;
+  }
+
+  if (len == 0) {
+    ret->width = 0;
+    return ret;
+  }
+
+  num_words = ((len - 1) / BN_BYTES) + 1;
+  m = (len - 1) % BN_BYTES;
+  if (!bn_wexpand(ret, num_words)) {
+    if (bn) {
+      BN_free(bn);
+    }
+    return NULL;
+  }
+
+  // |bn_wexpand| must check bounds on |num_words| to write it into
+  // |ret->dmax|.
+  assert(num_words <= INT_MAX);
+  ret->width = (int)num_words;
+  ret->neg = 0;
+
+  while (len--) {
+    word = (word << 8) | *(in++);
+    if (m-- == 0) {
+      ret->d[--num_words] = word;
+      word = 0;
+      m = BN_BYTES - 1;
+    }
+  }
+
+  return ret;
+}
+
+BIGNUM *BN_le2bn(const uint8_t *in, size_t len, BIGNUM *ret) {
+  BIGNUM *bn = NULL;
+  if (ret == NULL) {
+    bn = BN_new();
+    ret = bn;
+  }
+
+  if (ret == NULL) {
+    return NULL;
+  }
+
+  if (len == 0) {
+    ret->width = 0;
+    ret->neg = 0;
+    return ret;
+  }
+
+  // Reserve enough space in |ret|.
+  size_t num_words = ((len - 1) / BN_BYTES) + 1;
+  if (!bn_wexpand(ret, num_words)) {
+    BN_free(bn);
+    return NULL;
+  }
+  ret->width = num_words;
+
+  // Make sure the top bytes will be zeroed.
+  ret->d[num_words - 1] = 0;
+
+  // We only support little-endian platforms, so we can simply memcpy the
+  // internal representation.
+  OPENSSL_memcpy(ret->d, in, len);
+  return ret;
+}
+
+size_t BN_bn2bin(const BIGNUM *in, uint8_t *out) {
+  size_t n, i;
+  BN_ULONG l;
+
+  n = i = BN_num_bytes(in);
+  while (i--) {
+    l = in->d[i / BN_BYTES];
+    *(out++) = (unsigned char)(l >> (8 * (i % BN_BYTES))) & 0xff;
+  }
+  return n;
+}
+
+static int fits_in_bytes(const uint8_t *bytes, size_t num_bytes, size_t len) {
+  uint8_t mask = 0;
+  for (size_t i = len; i < num_bytes; i++) {
+    mask |= bytes[i];
+  }
+  return mask == 0;
+}
+
+int BN_bn2le_padded(uint8_t *out, size_t len, const BIGNUM *in) {
+  const uint8_t *bytes = (const uint8_t *)in->d;
+  size_t num_bytes = in->width * BN_BYTES;
+  if (len < num_bytes) {
+    if (!fits_in_bytes(bytes, num_bytes, len)) {
+      return 0;
+    }
+    num_bytes = len;
+  }
+
+  // We only support little-endian platforms, so we can simply memcpy into the
+  // internal representation.
+  OPENSSL_memcpy(out, bytes, num_bytes);
+  // Pad out the rest of the buffer with zeroes.
+  OPENSSL_memset(out + num_bytes, 0, len - num_bytes);
+  return 1;
+}
+
+int BN_bn2bin_padded(uint8_t *out, size_t len, const BIGNUM *in) {
+  const uint8_t *bytes = (const uint8_t *)in->d;
+  size_t num_bytes = in->width * BN_BYTES;
+  if (len < num_bytes) {
+    if (!fits_in_bytes(bytes, num_bytes, len)) {
+      return 0;
+    }
+    num_bytes = len;
+  }
+
+  // We only support little-endian platforms, so we can simply write the buffer
+  // in reverse.
+  for (size_t i = 0; i < num_bytes; i++) {
+    out[len - i - 1] = bytes[i];
+  }
+  // Pad out the rest of the buffer with zeroes.
+  OPENSSL_memset(out, 0, len - num_bytes);
+  return 1;
+}
+
+BN_ULONG BN_get_word(const BIGNUM *bn) {
+  switch (bn_minimal_width(bn)) {
+    case 0:
+      return 0;
+    case 1:
+      return bn->d[0];
+    default:
+      return BN_MASK2;
+  }
+}
+
+int BN_get_u64(const BIGNUM *bn, uint64_t *out) {
+  switch (bn_minimal_width(bn)) {
+    case 0:
+      *out = 0;
+      return 1;
+    case 1:
+      *out = bn->d[0];
+      return 1;
+#if defined(OPENSSL_32_BIT)
+    case 2:
+      *out = (uint64_t) bn->d[0] | (((uint64_t) bn->d[1]) << 32);
+      return 1;
+#endif
+    default:
+      return 0;
+  }
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/cmp.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/cmp.c
@ -0,0 +1,200 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.] */
+
+#include <CBigNumBoringSSL_bn.h>
+
+#include <CBigNumBoringSSL_mem.h>
+#include <CBigNumBoringSSL_type_check.h>
+
+#include "internal.h"
+#include "../../internal.h"
+
+
+static int bn_cmp_words_consttime(const BN_ULONG *a, size_t a_len,
+                                  const BN_ULONG *b, size_t b_len) {
+  OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t),
+                        "crypto_word_t is too small");
+  int ret = 0;
+  // Process the common words in little-endian order.
+  size_t min = a_len < b_len ? a_len : b_len;
+  for (size_t i = 0; i < min; i++) {
+    crypto_word_t eq = constant_time_eq_w(a[i], b[i]);
+    crypto_word_t lt = constant_time_lt_w(a[i], b[i]);
+    ret =
+        constant_time_select_int(eq, ret, constant_time_select_int(lt, -1, 1));
+  }
+
+  // If |a| or |b| has non-zero words beyond |min|, they take precedence.
+  if (a_len < b_len) {
+    crypto_word_t mask = 0;
+    for (size_t i = a_len; i < b_len; i++) {
+      mask |= b[i];
+    }
+    ret = constant_time_select_int(constant_time_is_zero_w(mask), ret, -1);
+  } else if (b_len < a_len) {
+    crypto_word_t mask = 0;
+    for (size_t i = b_len; i < a_len; i++) {
+      mask |= a[i];
+    }
+    ret = constant_time_select_int(constant_time_is_zero_w(mask), ret, 1);
+  }
+
+  return ret;
+}
+
+int BN_ucmp(const BIGNUM *a, const BIGNUM *b) {
+  return bn_cmp_words_consttime(a->d, a->width, b->d, b->width);
+}
+
+int BN_cmp(const BIGNUM *a, const BIGNUM *b) {
+  if ((a == NULL) || (b == NULL)) {
+    if (a != NULL) {
+      return -1;
+    } else if (b != NULL) {
+      return 1;
+    } else {
+      return 0;
+    }
+  }
+
+  // We do not attempt to process the sign bit in constant time. Negative
+  // |BIGNUM|s should never occur in crypto, only calculators.
+  if (a->neg != b->neg) {
+    if (a->neg) {
+      return -1;
+    }
+    return 1;
+  }
+
+  int ret = BN_ucmp(a, b);
+  return a->neg ? -ret : ret;
+}
+
+int bn_less_than_words(const BN_ULONG *a, const BN_ULONG *b, size_t len) {
+  return bn_cmp_words_consttime(a, len, b, len) < 0;
+}
+
+int BN_abs_is_word(const BIGNUM *bn, BN_ULONG w) {
+  if (bn->width == 0) {
+    return w == 0;
+  }
+  BN_ULONG mask = bn->d[0] ^ w;
+  for (int i = 1; i < bn->width; i++) {
+    mask |= bn->d[i];
+  }
+  return mask == 0;
+}
+
+int BN_cmp_word(const BIGNUM *a, BN_ULONG b) {
+  BIGNUM b_bn;
+  BN_init(&b_bn);
+
+  b_bn.d = &b;
+  b_bn.width = b > 0;
+  b_bn.dmax = 1;
+  b_bn.flags = BN_FLG_STATIC_DATA;
+  return BN_cmp(a, &b_bn);
+}
+
+int BN_is_zero(const BIGNUM *bn) {
+  return bn_fits_in_words(bn, 0);
+}
+
+int BN_is_one(const BIGNUM *bn) {
+  return bn->neg == 0 && BN_abs_is_word(bn, 1);
+}
+
+int BN_is_word(const BIGNUM *bn, BN_ULONG w) {
+  return BN_abs_is_word(bn, w) && (w == 0 || bn->neg == 0);
+}
+
+int BN_is_odd(const BIGNUM *bn) {
+  return bn->width > 0 && (bn->d[0] & 1) == 1;
+}
+
+int BN_is_pow2(const BIGNUM *bn) {
+  int width = bn_minimal_width(bn);
+  if (width == 0 || bn->neg) {
+    return 0;
+  }
+
+  for (int i = 0; i < width - 1; i++) {
+    if (bn->d[i] != 0) {
+      return 0;
+    }
+  }
+
+  return 0 == (bn->d[width-1] & (bn->d[width-1] - 1));
+}
+
+int BN_equal_consttime(const BIGNUM *a, const BIGNUM *b) {
+  BN_ULONG mask = 0;
+  // If |a| or |b| has more words than the other, all those words must be zero.
+  for (int i = a->width; i < b->width; i++) {
+    mask |= b->d[i];
+  }
+  for (int i = b->width; i < a->width; i++) {
+    mask |= a->d[i];
+  }
+  // Common words must match.
+  int min = a->width < b->width ? a->width : b->width;
+  for (int i = 0; i < min; i++) {
+    mask |= (a->d[i] ^ b->d[i]);
+  }
+  // The sign bit must match.
+  mask |= (a->neg ^ b->neg);
+  return mask == 0;
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/ctx.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/ctx.c
@ -0,0 +1,236 @@
+/* Written by Ulf Moeller for the OpenSSL project. */
+/* ====================================================================
+ * Copyright (c) 1998-2004 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer. 
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com). */
+
+
+#include <CBigNumBoringSSL_bn.h>
+
+#include <assert.h>
+#include <string.h>
+
+#include <CBigNumBoringSSL_err.h>
+#include <CBigNumBoringSSL_mem.h>
+
+#include "../../internal.h"
+
+
+// The stack frame info is resizing, set a first-time expansion size;
+#define BN_CTX_START_FRAMES 32
+
+
+// BN_STACK
+
+// A |BN_STACK| is a stack of |size_t| values.
+typedef struct {
+  // Array of indexes into |ctx->bignums|.
+  size_t *indexes;
+  // Number of stack frames, and the size of the allocated array
+  size_t depth, size;
+} BN_STACK;
+
+static void BN_STACK_init(BN_STACK *);
+static void BN_STACK_cleanup(BN_STACK *);
+static int BN_STACK_push(BN_STACK *, size_t idx);
+static size_t BN_STACK_pop(BN_STACK *);
+
+
+// BN_CTX
+
+DEFINE_STACK_OF(BIGNUM)
+
+// The opaque BN_CTX type
+struct bignum_ctx {
+  // bignums is the stack of |BIGNUM|s managed by this |BN_CTX|.
+  STACK_OF(BIGNUM) *bignums;
+  // stack is the stack of |BN_CTX_start| frames. It is the value of |used| at
+  // the time |BN_CTX_start| was called.
+  BN_STACK stack;
+  // used is the number of |BIGNUM|s from |bignums| that have been used.
+  size_t used;
+  // error is one if any operation on this |BN_CTX| failed. All subsequent
+  // operations will fail.
+  char error;
+  // defer_error is one if an operation on this |BN_CTX| has failed, but no
+  // error has been pushed to the queue yet. This is used to defer errors from
+  // |BN_CTX_start| to |BN_CTX_get|.
+  char defer_error;
+};
+
+BN_CTX *BN_CTX_new(void) {
+  BN_CTX *ret = OPENSSL_malloc(sizeof(BN_CTX));
+  if (!ret) {
+    OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
+    return NULL;
+  }
+
+  // Initialise the structure
+  ret->bignums = NULL;
+  BN_STACK_init(&ret->stack);
+  ret->used = 0;
+  ret->error = 0;
+  ret->defer_error = 0;
+  return ret;
+}
+
+void BN_CTX_free(BN_CTX *ctx) {
+  if (ctx == NULL) {
+    return;
+  }
+
+  // All |BN_CTX_start| calls must be matched with |BN_CTX_end|, otherwise the
+  // function may use more memory than expected, potentially without bound if
+  // done in a loop. Assert that all |BIGNUM|s have been released.
+  assert(ctx->used == 0 || ctx->error);
+  sk_BIGNUM_pop_free(ctx->bignums, BN_free);
+  BN_STACK_cleanup(&ctx->stack);
+  OPENSSL_free(ctx);
+}
+
+void BN_CTX_start(BN_CTX *ctx) {
+  if (ctx->error) {
+    // Once an operation has failed, |ctx->stack| no longer matches the number
+    // of |BN_CTX_end| calls to come. Do nothing.
+    return;
+  }
+
+  if (!BN_STACK_push(&ctx->stack, ctx->used)) {
+    ctx->error = 1;
+    // |BN_CTX_start| cannot fail, so defer the error to |BN_CTX_get|.
+    ctx->defer_error = 1;
+  }
+}
+
+BIGNUM *BN_CTX_get(BN_CTX *ctx) {
+  // Once any operation has failed, they all do.
+  if (ctx->error) {
+    if (ctx->defer_error) {
+      OPENSSL_PUT_ERROR(BN, BN_R_TOO_MANY_TEMPORARY_VARIABLES);
+      ctx->defer_error = 0;
+    }
+    return NULL;
+  }
+
+  if (ctx->bignums == NULL) {
+    ctx->bignums = sk_BIGNUM_new_null();
+    if (ctx->bignums == NULL) {
+      OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
+      ctx->error = 1;
+      return NULL;
+    }
+  }
+
+  if (ctx->used == sk_BIGNUM_num(ctx->bignums)) {
+    BIGNUM *bn = BN_new();
+    if (bn == NULL || !sk_BIGNUM_push(ctx->bignums, bn)) {
+      OPENSSL_PUT_ERROR(BN, BN_R_TOO_MANY_TEMPORARY_VARIABLES);
+      BN_free(bn);
+      ctx->error = 1;
+      return NULL;
+    }
+  }
+
+  BIGNUM *ret = sk_BIGNUM_value(ctx->bignums, ctx->used);
+  BN_zero(ret);
+  // This is bounded by |sk_BIGNUM_num|, so it cannot overflow.
+  ctx->used++;
+  return ret;
+}
+
+void BN_CTX_end(BN_CTX *ctx) {
+  if (ctx->error) {
+    // Once an operation has failed, |ctx->stack| no longer matches the number
+    // of |BN_CTX_end| calls to come. Do nothing.
+    return;
+  }
+
+  ctx->used = BN_STACK_pop(&ctx->stack);
+}
+
+
+// BN_STACK
+
+static void BN_STACK_init(BN_STACK *st) {
+  st->indexes = NULL;
+  st->depth = st->size = 0;
+}
+
+static void BN_STACK_cleanup(BN_STACK *st) {
+  OPENSSL_free(st->indexes);
+}
+
+static int BN_STACK_push(BN_STACK *st, size_t idx) {
+  if (st->depth == st->size) {
+    // This function intentionally does not push to the error queue on error.
+    // Error-reporting is deferred to |BN_CTX_get|.
+    size_t new_size = st->size != 0 ? st->size * 3 / 2 : BN_CTX_START_FRAMES;
+    if (new_size <= st->size || new_size > ((size_t)-1) / sizeof(size_t)) {
+      return 0;
+    }
+    size_t *new_indexes =
+        OPENSSL_realloc(st->indexes, new_size * sizeof(size_t));
+    if (new_indexes == NULL) {
+      return 0;
+    }
+    st->indexes = new_indexes;
+    st->size = new_size;
+  }
+
+  st->indexes[st->depth] = idx;
+  st->depth++;
+  return 1;
+}
+
+static size_t BN_STACK_pop(BN_STACK *st) {
+  assert(st->depth > 0);
+  st->depth--;
+  return st->indexes[st->depth];
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/div.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/div.c
@ -0,0 +1,886 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.] */
+
+#include <CBigNumBoringSSL_bn.h>
+
+#include <assert.h>
+#include <limits.h>
+
+#include <CBigNumBoringSSL_err.h>
+
+#include "internal.h"
+
+
+#if !defined(BN_CAN_DIVIDE_ULLONG) && !defined(BN_CAN_USE_INLINE_ASM)
+// bn_div_words divides a double-width |h|,|l| by |d| and returns the result,
+// which must fit in a |BN_ULONG|.
+static BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) {
+  BN_ULONG dh, dl, q, ret = 0, th, tl, t;
+  int i, count = 2;
+
+  if (d == 0) {
+    return BN_MASK2;
+  }
+
+  i = BN_num_bits_word(d);
+  assert((i == BN_BITS2) || (h <= (BN_ULONG)1 << i));
+
+  i = BN_BITS2 - i;
+  if (h >= d) {
+    h -= d;
+  }
+
+  if (i) {
+    d <<= i;
+    h = (h << i) | (l >> (BN_BITS2 - i));
+    l <<= i;
+  }
+  dh = (d & BN_MASK2h) >> BN_BITS4;
+  dl = (d & BN_MASK2l);
+  for (;;) {
+    if ((h >> BN_BITS4) == dh) {
+      q = BN_MASK2l;
+    } else {
+      q = h / dh;
+    }
+
+    th = q * dh;
+    tl = dl * q;
+    for (;;) {
+      t = h - th;
+      if ((t & BN_MASK2h) ||
+          ((tl) <= ((t << BN_BITS4) | ((l & BN_MASK2h) >> BN_BITS4)))) {
+        break;
+      }
+      q--;
+      th -= dh;
+      tl -= dl;
+    }
+    t = (tl >> BN_BITS4);
+    tl = (tl << BN_BITS4) & BN_MASK2h;
+    th += t;
+
+    if (l < tl) {
+      th++;
+    }
+    l -= tl;
+    if (h < th) {
+      h += d;
+      q--;
+    }
+    h -= th;
+
+    if (--count == 0) {
+      break;
+    }
+
+    ret = q << BN_BITS4;
+    h = (h << BN_BITS4) | (l >> BN_BITS4);
+    l = (l & BN_MASK2l) << BN_BITS4;
+  }
+
+  ret |= q;
+  return ret;
+}
+#endif  // !defined(BN_CAN_DIVIDE_ULLONG) && !defined(BN_CAN_USE_INLINE_ASM)
+
+static inline void bn_div_rem_words(BN_ULONG *quotient_out, BN_ULONG *rem_out,
+                                    BN_ULONG n0, BN_ULONG n1, BN_ULONG d0) {
+  // GCC and Clang generate function calls to |__udivdi3| and |__umoddi3| when
+  // the |BN_ULLONG|-based C code is used.
+  //
+  // GCC bugs:
+  //   * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=14224
+  //   * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43721
+  //   * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54183
+  //   * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58897
+  //   * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65668
+  //
+  // Clang bugs:
+  //   * https://llvm.org/bugs/show_bug.cgi?id=6397
+  //   * https://llvm.org/bugs/show_bug.cgi?id=12418
+  //
+  // These issues aren't specific to x86 and x86_64, so it might be worthwhile
+  // to add more assembly language implementations.
+#if defined(BN_CAN_USE_INLINE_ASM) && defined(OPENSSL_X86)
+  __asm__ volatile("divl %4"
+                   : "=a"(*quotient_out), "=d"(*rem_out)
+                   : "a"(n1), "d"(n0), "rm"(d0)
+                   : "cc");
+#elif defined(BN_CAN_USE_INLINE_ASM) && defined(OPENSSL_X86_64)
+  __asm__ volatile("divq %4"
+                   : "=a"(*quotient_out), "=d"(*rem_out)
+                   : "a"(n1), "d"(n0), "rm"(d0)
+                   : "cc");
+#else
+#if defined(BN_CAN_DIVIDE_ULLONG)
+  BN_ULLONG n = (((BN_ULLONG)n0) << BN_BITS2) | n1;
+  *quotient_out = (BN_ULONG)(n / d0);
+#else
+  *quotient_out = bn_div_words(n0, n1, d0);
+#endif
+  *rem_out = n1 - (*quotient_out * d0);
+#endif
+}
+
+// BN_div computes "quotient := numerator / divisor", rounding towards zero,
+// and sets up |rem| such that "quotient * divisor + rem = numerator" holds.
+//
+// Thus:
+//
+//     quotient->neg == numerator->neg ^ divisor->neg
+//        (unless the result is zero)
+//     rem->neg == numerator->neg
+//        (unless the remainder is zero)
+//
+// If |quotient| or |rem| is NULL, the respective value is not returned.
+//
+// This was specifically designed to contain fewer branches that may leak
+// sensitive information; see "New Branch Prediction Vulnerabilities in OpenSSL
+// and Necessary Software Countermeasures" by Onur Acıçmez, Shay Gueron, and
+// Jean-Pierre Seifert.
+int BN_div(BIGNUM *quotient, BIGNUM *rem, const BIGNUM *numerator,
+           const BIGNUM *divisor, BN_CTX *ctx) {
+  int norm_shift, loop;
+  BIGNUM wnum;
+  BN_ULONG *resp, *wnump;
+  BN_ULONG d0, d1;
+  int num_n, div_n;
+
+  // This function relies on the historical minimal-width |BIGNUM| invariant.
+  // It is already not constant-time (constant-time reductions should use
+  // Montgomery logic), so we shrink all inputs and intermediate values to
+  // retain the previous behavior.
+
+  // Invalid zero-padding would have particularly bad consequences.
+  int numerator_width = bn_minimal_width(numerator);
+  int divisor_width = bn_minimal_width(divisor);
+  if ((numerator_width > 0 && numerator->d[numerator_width - 1] == 0) ||
+      (divisor_width > 0 && divisor->d[divisor_width - 1] == 0)) {
+    OPENSSL_PUT_ERROR(BN, BN_R_NOT_INITIALIZED);
+    return 0;
+  }
+
+  if (BN_is_zero(divisor)) {
+    OPENSSL_PUT_ERROR(BN, BN_R_DIV_BY_ZERO);
+    return 0;
+  }
+
+  BN_CTX_start(ctx);
+  BIGNUM *tmp = BN_CTX_get(ctx);
+  BIGNUM *snum = BN_CTX_get(ctx);
+  BIGNUM *sdiv = BN_CTX_get(ctx);
+  BIGNUM *res = NULL;
+  if (quotient == NULL) {
+    res = BN_CTX_get(ctx);
+  } else {
+    res = quotient;
+  }
+  if (sdiv == NULL || res == NULL) {
+    goto err;
+  }
+
+  // First we normalise the numbers
+  norm_shift = BN_BITS2 - (BN_num_bits(divisor) % BN_BITS2);
+  if (!BN_lshift(sdiv, divisor, norm_shift)) {
+    goto err;
+  }
+  bn_set_minimal_width(sdiv);
+  sdiv->neg = 0;
+  norm_shift += BN_BITS2;
+  if (!BN_lshift(snum, numerator, norm_shift)) {
+    goto err;
+  }
+  bn_set_minimal_width(snum);
+  snum->neg = 0;
+
+  // Since we don't want to have special-case logic for the case where snum is
+  // larger than sdiv, we pad snum with enough zeroes without changing its
+  // value.
+  if (snum->width <= sdiv->width + 1) {
+    if (!bn_wexpand(snum, sdiv->width + 2)) {
+      goto err;
+    }
+    for (int i = snum->width; i < sdiv->width + 2; i++) {
+      snum->d[i] = 0;
+    }
+    snum->width = sdiv->width + 2;
+  } else {
+    if (!bn_wexpand(snum, snum->width + 1)) {
+      goto err;
+    }
+    snum->d[snum->width] = 0;
+    snum->width++;
+  }
+
+  div_n = sdiv->width;
+  num_n = snum->width;
+  loop = num_n - div_n;
+  // Lets setup a 'window' into snum
+  // This is the part that corresponds to the current
+  // 'area' being divided
+  wnum.neg = 0;
+  wnum.d = &(snum->d[loop]);
+  wnum.width = div_n;
+  // only needed when BN_ucmp messes up the values between width and max
+  wnum.dmax = snum->dmax - loop;  // so we don't step out of bounds
+
+  // Get the top 2 words of sdiv
+  // div_n=sdiv->width;
+  d0 = sdiv->d[div_n - 1];
+  d1 = (div_n == 1) ? 0 : sdiv->d[div_n - 2];
+
+  // pointer to the 'top' of snum
+  wnump = &(snum->d[num_n - 1]);
+
+  // Setup to 'res'
+  res->neg = (numerator->neg ^ divisor->neg);
+  if (!bn_wexpand(res, loop + 1)) {
+    goto err;
+  }
+  res->width = loop - 1;
+  resp = &(res->d[loop - 1]);
+
+  // space for temp
+  if (!bn_wexpand(tmp, div_n + 1)) {
+    goto err;
+  }
+
+  // if res->width == 0 then clear the neg value otherwise decrease
+  // the resp pointer
+  if (res->width == 0) {
+    res->neg = 0;
+  } else {
+    resp--;
+  }
+
+  for (int i = 0; i < loop - 1; i++, wnump--, resp--) {
+    BN_ULONG q, l0;
+    // the first part of the loop uses the top two words of snum and sdiv to
+    // calculate a BN_ULONG q such that | wnum - sdiv * q | < sdiv
+    BN_ULONG n0, n1, rm = 0;
+
+    n0 = wnump[0];
+    n1 = wnump[-1];
+    if (n0 == d0) {
+      q = BN_MASK2;
+    } else {
+      // n0 < d0
+      bn_div_rem_words(&q, &rm, n0, n1, d0);
+
+#ifdef BN_ULLONG
+      BN_ULLONG t2 = (BN_ULLONG)d1 * q;
+      for (;;) {
+        if (t2 <= ((((BN_ULLONG)rm) << BN_BITS2) | wnump[-2])) {
+          break;
+        }
+        q--;
+        rm += d0;
+        if (rm < d0) {
+          break;  // don't let rm overflow
+        }
+        t2 -= d1;
+      }
+#else  // !BN_ULLONG
+      BN_ULONG t2l, t2h;
+      BN_UMULT_LOHI(t2l, t2h, d1, q);
+      for (;;) {
+        if (t2h < rm ||
+            (t2h == rm && t2l <= wnump[-2])) {
+          break;
+        }
+        q--;
+        rm += d0;
+        if (rm < d0) {
+          break;  // don't let rm overflow
+        }
+        if (t2l < d1) {
+          t2h--;
+        }
+        t2l -= d1;
+      }
+#endif  // !BN_ULLONG
+    }
+
+    l0 = bn_mul_words(tmp->d, sdiv->d, div_n, q);
+    tmp->d[div_n] = l0;
+    wnum.d--;
+    // ingore top values of the bignums just sub the two
+    // BN_ULONG arrays with bn_sub_words
+    if (bn_sub_words(wnum.d, wnum.d, tmp->d, div_n + 1)) {
+      // Note: As we have considered only the leading
+      // two BN_ULONGs in the calculation of q, sdiv * q
+      // might be greater than wnum (but then (q-1) * sdiv
+      // is less or equal than wnum)
+      q--;
+      if (bn_add_words(wnum.d, wnum.d, sdiv->d, div_n)) {
+        // we can't have an overflow here (assuming
+        // that q != 0, but if q == 0 then tmp is
+        // zero anyway)
+        (*wnump)++;
+      }
+    }
+    // store part of the result
+    *resp = q;
+  }
+
+  bn_set_minimal_width(snum);
+
+  if (rem != NULL) {
+    // Keep a copy of the neg flag in numerator because if |rem| == |numerator|
+    // |BN_rshift| will overwrite it.
+    int neg = numerator->neg;
+    if (!BN_rshift(rem, snum, norm_shift)) {
+      goto err;
+    }
+    if (!BN_is_zero(rem)) {
+      rem->neg = neg;
+    }
+  }
+
+  bn_set_minimal_width(res);
+  BN_CTX_end(ctx);
+  return 1;
+
+err:
+  BN_CTX_end(ctx);
+  return 0;
+}
+
+int BN_nnmod(BIGNUM *r, const BIGNUM *m, const BIGNUM *d, BN_CTX *ctx) {
+  if (!(BN_mod(r, m, d, ctx))) {
+    return 0;
+  }
+  if (!r->neg) {
+    return 1;
+  }
+
+  // now -|d| < r < 0, so we have to set r := r + |d|.
+  return (d->neg ? BN_sub : BN_add)(r, r, d);
+}
+
+BN_ULONG bn_reduce_once(BN_ULONG *r, const BN_ULONG *a, BN_ULONG carry,
+                        const BN_ULONG *m, size_t num) {
+  assert(r != a);
+  // |r| = |a| - |m|. |bn_sub_words| performs the bulk of the subtraction, and
+  // then we apply the borrow to |carry|.
+  carry -= bn_sub_words(r, a, m, num);
+  // We know 0 <= |a| < 2*|m|, so -|m| <= |r| < |m|.
+  //
+  // If 0 <= |r| < |m|, |r| fits in |num| words and |carry| is zero. We then
+  // wish to select |r| as the answer. Otherwise -m <= r < 0 and we wish to
+  // return |r| + |m|, or |a|. |carry| must then be -1 or all ones. In both
+  // cases, |carry| is a suitable input to |bn_select_words|.
+  //
+  // Although |carry| may be one if it was one on input and |bn_sub_words|
+  // returns zero, this would give |r| > |m|, violating our input assumptions.
+  assert(carry == 0 || carry == (BN_ULONG)-1);
+  bn_select_words(r, carry, a /* r < 0 */, r /* r >= 0 */, num);
+  return carry;
+}
+
+BN_ULONG bn_reduce_once_in_place(BN_ULONG *r, BN_ULONG carry, const BN_ULONG *m,
+                                 BN_ULONG *tmp, size_t num) {
+  // See |bn_reduce_once| for why this logic works.
+  carry -= bn_sub_words(tmp, r, m, num);
+  assert(carry == 0 || carry == (BN_ULONG)-1);
+  bn_select_words(r, carry, r /* tmp < 0 */, tmp /* tmp >= 0 */, num);
+  return carry;
+}
+
+void bn_mod_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
+                      const BN_ULONG *m, BN_ULONG *tmp, size_t num) {
+  // r = a - b
+  BN_ULONG borrow = bn_sub_words(r, a, b, num);
+  // tmp = a - b + m
+  bn_add_words(tmp, r, m, num);
+  bn_select_words(r, 0 - borrow, tmp /* r < 0 */, r /* r >= 0 */, num);
+}
+
+void bn_mod_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
+                      const BN_ULONG *m, BN_ULONG *tmp, size_t num) {
+  BN_ULONG carry = bn_add_words(r, a, b, num);
+  bn_reduce_once_in_place(r, carry, m, tmp, num);
+}
+
+int bn_div_consttime(BIGNUM *quotient, BIGNUM *remainder,
+                     const BIGNUM *numerator, const BIGNUM *divisor,
+                     BN_CTX *ctx) {
+  if (BN_is_negative(numerator) || BN_is_negative(divisor)) {
+    OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
+    return 0;
+  }
+  if (BN_is_zero(divisor)) {
+    OPENSSL_PUT_ERROR(BN, BN_R_DIV_BY_ZERO);
+    return 0;
+  }
+
+  // This function implements long division in binary. It is not very efficient,
+  // but it is simple, easy to make constant-time, and performant enough for RSA
+  // key generation.
+
+  int ret = 0;
+  BN_CTX_start(ctx);
+  BIGNUM *q = quotient, *r = remainder;
+  if (quotient == NULL || quotient == numerator || quotient == divisor) {
+    q = BN_CTX_get(ctx);
+  }
+  if (remainder == NULL || remainder == numerator || remainder == divisor) {
+    r = BN_CTX_get(ctx);
+  }
+  BIGNUM *tmp = BN_CTX_get(ctx);
+  if (q == NULL || r == NULL || tmp == NULL ||
+      !bn_wexpand(q, numerator->width) ||
+      !bn_wexpand(r, divisor->width) ||
+      !bn_wexpand(tmp, divisor->width)) {
+    goto err;
+  }
+
+  OPENSSL_memset(q->d, 0, numerator->width * sizeof(BN_ULONG));
+  q->width = numerator->width;
+  q->neg = 0;
+
+  OPENSSL_memset(r->d, 0, divisor->width * sizeof(BN_ULONG));
+  r->width = divisor->width;
+  r->neg = 0;
+
+  // Incorporate |numerator| into |r|, one bit at a time, reducing after each
+  // step. At the start of each loop iteration, |r| < |divisor|
+  for (int i = numerator->width - 1; i >= 0; i--) {
+    for (int bit = BN_BITS2 - 1; bit >= 0; bit--) {
+      // Incorporate the next bit of the numerator, by computing
+      // r = 2*r or 2*r + 1. Note the result fits in one more word. We store the
+      // extra word in |carry|.
+      BN_ULONG carry = bn_add_words(r->d, r->d, r->d, divisor->width);
+      r->d[0] |= (numerator->d[i] >> bit) & 1;
+      // |r| was previously fully-reduced, so we know:
+      //      2*0 <= r <= 2*(divisor-1) + 1
+      //        0 <= r <= 2*divisor - 1 < 2*divisor.
+      // Thus |r| satisfies the preconditions for |bn_reduce_once_in_place|.
+      BN_ULONG subtracted = bn_reduce_once_in_place(r->d, carry, divisor->d,
+                                                    tmp->d, divisor->width);
+      // The corresponding bit of the quotient is set iff we needed to subtract.
+      q->d[i] |= (~subtracted & 1) << bit;
+    }
+  }
+
+  if ((quotient != NULL && !BN_copy(quotient, q)) ||
+      (remainder != NULL && !BN_copy(remainder, r))) {
+    goto err;
+  }
+
+  ret = 1;
+
+err:
+  BN_CTX_end(ctx);
+  return ret;
+}
+
+static BIGNUM *bn_scratch_space_from_ctx(size_t width, BN_CTX *ctx) {
+  BIGNUM *ret = BN_CTX_get(ctx);
+  if (ret == NULL ||
+      !bn_wexpand(ret, width)) {
+    return NULL;
+  }
+  ret->neg = 0;
+  ret->width = width;
+  return ret;
+}
+
+// bn_resized_from_ctx returns |bn| with width at least |width| or NULL on
+// error. This is so it may be used with low-level "words" functions. If
+// necessary, it allocates a new |BIGNUM| with a lifetime of the current scope
+// in |ctx|, so the caller does not need to explicitly free it. |bn| must fit in
+// |width| words.
+static const BIGNUM *bn_resized_from_ctx(const BIGNUM *bn, size_t width,
+                                         BN_CTX *ctx) {
+  if ((size_t)bn->width >= width) {
+    // Any excess words must be zero.
+    assert(bn_fits_in_words(bn, width));
+    return bn;
+  }
+  BIGNUM *ret = bn_scratch_space_from_ctx(width, ctx);
+  if (ret == NULL ||
+      !BN_copy(ret, bn) ||
+      !bn_resize_words(ret, width)) {
+    return NULL;
+  }
+  return ret;
+}
+
+int BN_mod_add(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
+               BN_CTX *ctx) {
+  if (!BN_add(r, a, b)) {
+    return 0;
+  }
+  return BN_nnmod(r, r, m, ctx);
+}
+
+int BN_mod_add_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                     const BIGNUM *m) {
+  BN_CTX *ctx = BN_CTX_new();
+  int ok = ctx != NULL &&
+           bn_mod_add_consttime(r, a, b, m, ctx);
+  BN_CTX_free(ctx);
+  return ok;
+}
+
+int bn_mod_add_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                         const BIGNUM *m, BN_CTX *ctx) {
+  BN_CTX_start(ctx);
+  a = bn_resized_from_ctx(a, m->width, ctx);
+  b = bn_resized_from_ctx(b, m->width, ctx);
+  BIGNUM *tmp = bn_scratch_space_from_ctx(m->width, ctx);
+  int ok = a != NULL && b != NULL && tmp != NULL &&
+           bn_wexpand(r, m->width);
+  if (ok) {
+    bn_mod_add_words(r->d, a->d, b->d, m->d, tmp->d, m->width);
+    r->width = m->width;
+    r->neg = 0;
+  }
+  BN_CTX_end(ctx);
+  return ok;
+}
+
+int BN_mod_sub(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
+               BN_CTX *ctx) {
+  if (!BN_sub(r, a, b)) {
+    return 0;
+  }
+  return BN_nnmod(r, r, m, ctx);
+}
+
+int bn_mod_sub_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                         const BIGNUM *m, BN_CTX *ctx) {
+  BN_CTX_start(ctx);
+  a = bn_resized_from_ctx(a, m->width, ctx);
+  b = bn_resized_from_ctx(b, m->width, ctx);
+  BIGNUM *tmp = bn_scratch_space_from_ctx(m->width, ctx);
+  int ok = a != NULL && b != NULL && tmp != NULL &&
+           bn_wexpand(r, m->width);
+  if (ok) {
+    bn_mod_sub_words(r->d, a->d, b->d, m->d, tmp->d, m->width);
+    r->width = m->width;
+    r->neg = 0;
+  }
+  BN_CTX_end(ctx);
+  return ok;
+}
+
+int BN_mod_sub_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                     const BIGNUM *m) {
+  BN_CTX *ctx = BN_CTX_new();
+  int ok = ctx != NULL &&
+           bn_mod_sub_consttime(r, a, b, m, ctx);
+  BN_CTX_free(ctx);
+  return ok;
+}
+
+int BN_mod_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, const BIGNUM *m,
+               BN_CTX *ctx) {
+  BIGNUM *t;
+  int ret = 0;
+
+  BN_CTX_start(ctx);
+  t = BN_CTX_get(ctx);
+  if (t == NULL) {
+    goto err;
+  }
+
+  if (a == b) {
+    if (!BN_sqr(t, a, ctx)) {
+      goto err;
+    }
+  } else {
+    if (!BN_mul(t, a, b, ctx)) {
+      goto err;
+    }
+  }
+
+  if (!BN_nnmod(r, t, m, ctx)) {
+    goto err;
+  }
+
+  ret = 1;
+
+err:
+  BN_CTX_end(ctx);
+  return ret;
+}
+
+int BN_mod_sqr(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx) {
+  if (!BN_sqr(r, a, ctx)) {
+    return 0;
+  }
+
+  // r->neg == 0,  thus we don't need BN_nnmod
+  return BN_mod(r, r, m, ctx);
+}
+
+int BN_mod_lshift(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m,
+                  BN_CTX *ctx) {
+  BIGNUM *abs_m = NULL;
+  int ret;
+
+  if (!BN_nnmod(r, a, m, ctx)) {
+    return 0;
+  }
+
+  if (m->neg) {
+    abs_m = BN_dup(m);
+    if (abs_m == NULL) {
+      return 0;
+    }
+    abs_m->neg = 0;
+  }
+
+  ret = bn_mod_lshift_consttime(r, r, n, (abs_m ? abs_m : m), ctx);
+
+  BN_free(abs_m);
+  return ret;
+}
+
+int bn_mod_lshift_consttime(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m,
+                            BN_CTX *ctx) {
+  if (!BN_copy(r, a)) {
+    return 0;
+  }
+  for (int i = 0; i < n; i++) {
+    if (!bn_mod_lshift1_consttime(r, r, m, ctx)) {
+      return 0;
+    }
+  }
+  return 1;
+}
+
+int BN_mod_lshift_quick(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m) {
+  BN_CTX *ctx = BN_CTX_new();
+  int ok = ctx != NULL &&
+           bn_mod_lshift_consttime(r, a, n, m, ctx);
+  BN_CTX_free(ctx);
+  return ok;
+}
+
+int BN_mod_lshift1(BIGNUM *r, const BIGNUM *a, const BIGNUM *m, BN_CTX *ctx) {
+  if (!BN_lshift1(r, a)) {
+    return 0;
+  }
+
+  return BN_nnmod(r, r, m, ctx);
+}
+
+int bn_mod_lshift1_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *m,
+                             BN_CTX *ctx) {
+  return bn_mod_add_consttime(r, a, a, m, ctx);
+}
+
+int BN_mod_lshift1_quick(BIGNUM *r, const BIGNUM *a, const BIGNUM *m) {
+  BN_CTX *ctx = BN_CTX_new();
+  int ok = ctx != NULL &&
+           bn_mod_lshift1_consttime(r, a, m, ctx);
+  BN_CTX_free(ctx);
+  return ok;
+}
+
+BN_ULONG BN_div_word(BIGNUM *a, BN_ULONG w) {
+  BN_ULONG ret = 0;
+  int i, j;
+
+  if (!w) {
+    // actually this an error (division by zero)
+    return (BN_ULONG) - 1;
+  }
+
+  if (a->width == 0) {
+    return 0;
+  }
+
+  // normalize input for |bn_div_rem_words|.
+  j = BN_BITS2 - BN_num_bits_word(w);
+  w <<= j;
+  if (!BN_lshift(a, a, j)) {
+    return (BN_ULONG) - 1;
+  }
+
+  for (i = a->width - 1; i >= 0; i--) {
+    BN_ULONG l = a->d[i];
+    BN_ULONG d;
+    BN_ULONG unused_rem;
+    bn_div_rem_words(&d, &unused_rem, ret, l, w);
+    ret = l - (d * w);
+    a->d[i] = d;
+  }
+
+  bn_set_minimal_width(a);
+  ret >>= j;
+  return ret;
+}
+
+BN_ULONG BN_mod_word(const BIGNUM *a, BN_ULONG w) {
+#ifndef BN_CAN_DIVIDE_ULLONG
+  BN_ULONG ret = 0;
+#else
+  BN_ULLONG ret = 0;
+#endif
+  int i;
+
+  if (w == 0) {
+    return (BN_ULONG) -1;
+  }
+
+#ifndef BN_CAN_DIVIDE_ULLONG
+  // If |w| is too long and we don't have |BN_ULLONG| division then we need to
+  // fall back to using |BN_div_word|.
+  if (w > ((BN_ULONG)1 << BN_BITS4)) {
+    BIGNUM *tmp = BN_dup(a);
+    if (tmp == NULL) {
+      return (BN_ULONG)-1;
+    }
+    ret = BN_div_word(tmp, w);
+    BN_free(tmp);
+    return ret;
+  }
+#endif
+
+  for (i = a->width - 1; i >= 0; i--) {
+#ifndef BN_CAN_DIVIDE_ULLONG
+    ret = ((ret << BN_BITS4) | ((a->d[i] >> BN_BITS4) & BN_MASK2l)) % w;
+    ret = ((ret << BN_BITS4) | (a->d[i] & BN_MASK2l)) % w;
+#else
+    ret = (BN_ULLONG)(((ret << (BN_ULLONG)BN_BITS2) | a->d[i]) % (BN_ULLONG)w);
+#endif
+  }
+  return (BN_ULONG)ret;
+}
+
+int BN_mod_pow2(BIGNUM *r, const BIGNUM *a, size_t e) {
+  if (e == 0 || a->width == 0) {
+    BN_zero(r);
+    return 1;
+  }
+
+  size_t num_words = 1 + ((e - 1) / BN_BITS2);
+
+  // If |a| definitely has less than |e| bits, just BN_copy.
+  if ((size_t) a->width < num_words) {
+    return BN_copy(r, a) != NULL;
+  }
+
+  // Otherwise, first make sure we have enough space in |r|.
+  // Note that this will fail if num_words > INT_MAX.
+  if (!bn_wexpand(r, num_words)) {
+    return 0;
+  }
+
+  // Copy the content of |a| into |r|.
+  OPENSSL_memcpy(r->d, a->d, num_words * sizeof(BN_ULONG));
+
+  // If |e| isn't word-aligned, we have to mask off some of our bits.
+  size_t top_word_exponent = e % (sizeof(BN_ULONG) * 8);
+  if (top_word_exponent != 0) {
+    r->d[num_words - 1] &= (((BN_ULONG) 1) << top_word_exponent) - 1;
+  }
+
+  // Fill in the remaining fields of |r|.
+  r->neg = a->neg;
+  r->width = (int) num_words;
+  bn_set_minimal_width(r);
+  return 1;
+}
+
+int BN_nnmod_pow2(BIGNUM *r, const BIGNUM *a, size_t e) {
+  if (!BN_mod_pow2(r, a, e)) {
+    return 0;
+  }
+
+  // If the returned value was non-negative, we're done.
+  if (BN_is_zero(r) || !r->neg) {
+    return 1;
+  }
+
+  size_t num_words = 1 + (e - 1) / BN_BITS2;
+
+  // Expand |r| to the size of our modulus.
+  if (!bn_wexpand(r, num_words)) {
+    return 0;
+  }
+
+  // Clear the upper words of |r|.
+  OPENSSL_memset(&r->d[r->width], 0, (num_words - r->width) * BN_BYTES);
+
+  // Set parameters of |r|.
+  r->neg = 0;
+  r->width = (int) num_words;
+
+  // Now, invert every word. The idea here is that we want to compute 2^e-|x|,
+  // which is actually equivalent to the twos-complement representation of |x|
+  // in |e| bits, which is -x = ~x + 1.
+  for (int i = 0; i < r->width; i++) {
+    r->d[i] = ~r->d[i];
+  }
+
+  // If our exponent doesn't span the top word, we have to mask the rest.
+  size_t top_word_exponent = e % BN_BITS2;
+  if (top_word_exponent != 0) {
+    r->d[r->width - 1] &= (((BN_ULONG) 1) << top_word_exponent) - 1;
+  }
+
+  // Keep the minimal-width invariant for |BIGNUM|.
+  bn_set_minimal_width(r);
+
+  // Finally, add one, for the reason described above.
+  return BN_add(r, r, BN_value_one());
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/div_extra.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/div_extra.c
@ -0,0 +1,87 @@
+/* Copyright (c) 2018, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <CBigNumBoringSSL_bn.h>
+
+#include <assert.h>
+
+#include "internal.h"
+
+
+// The following functions use a Barrett reduction variant to avoid leaking the
+// numerator. See http://ridiculousfish.com/blog/posts/labor-of-division-episode-i.html
+//
+// We use 32-bit numerator and 16-bit divisor for simplicity. This allows
+// computing |m| and |q| without architecture-specific code.
+
+// mod_u16 returns |n| mod |d|. |p| and |m| are the "magic numbers" for |d| (see
+// reference). For proof of correctness in Coq, see
+// https://github.com/davidben/fiat-crypto/blob/barrett/src/Arithmetic/BarrettReduction/RidiculousFish.v
+// Note the Coq version of |mod_u16| additionally includes the computation of
+// |p| and |m| from |bn_mod_u16_consttime| below.
+static uint16_t mod_u16(uint32_t n, uint16_t d, uint32_t p, uint32_t m) {
+  // Compute floor(n/d) per steps 3 through 5.
+  uint32_t q = ((uint64_t)m * n) >> 32;
+  // Note there is a typo in the reference. We right-shift by one, not two.
+  uint32_t t = ((n - q) >> 1) + q;
+  t = t >> (p - 1);
+
+  // Multiply and subtract to get the remainder.
+  n -= d * t;
+  assert(n < d);
+  return n;
+}
+
+// shift_and_add_mod_u16 returns |r| * 2^32 + |a| mod |d|. |p| and |m| are the
+// "magic numbers" for |d| (see reference).
+static uint16_t shift_and_add_mod_u16(uint16_t r, uint32_t a, uint16_t d,
+                                      uint32_t p, uint32_t m) {
+  // Incorporate |a| in two 16-bit chunks.
+  uint32_t t = r;
+  t <<= 16;
+  t |= a >> 16;
+  t = mod_u16(t, d, p, m);
+
+  t <<= 16;
+  t |= a & 0xffff;
+  t = mod_u16(t, d, p, m);
+  return t;
+}
+
+uint16_t bn_mod_u16_consttime(const BIGNUM *bn, uint16_t d) {
+  if (d <= 1) {
+    return 0;
+  }
+
+  // Compute the "magic numbers" for |d|. See steps 1 and 2.
+  // This computes p = ceil(log_2(d)).
+  uint32_t p = BN_num_bits_word(d - 1);
+  // This operation is not constant-time, but |p| and |d| are public values.
+  // Note that |p| is at most 16, so the computation fits in |uint64_t|.
+  assert(p <= 16);
+  uint32_t m = ((UINT64_C(1) << (32 + p)) + d - 1) / d;
+
+  uint16_t ret = 0;
+  for (int i = bn->width - 1; i >= 0; i--) {
+#if BN_BITS2 == 32
+    ret = shift_and_add_mod_u16(ret, bn->d[i], d, p, m);
+#elif BN_BITS2 == 64
+    ret = shift_and_add_mod_u16(ret, bn->d[i] >> 32, d, p, m);
+    ret = shift_and_add_mod_u16(ret, bn->d[i] & 0xffffffff, d, p, m);
+#else
+#error "Unknown BN_ULONG size"
+#endif
+  }
+  return ret;
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/exponentiation.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/exponentiation.c
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/gcd.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/gcd.c
@ -0,0 +1,378 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+/* ====================================================================
+ * Copyright (c) 1998-2001 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com). */
+
+#include <CBigNumBoringSSL_bn.h>
+
+#include <CBigNumBoringSSL_err.h>
+
+#include "internal.h"
+
+
+int BN_mod_inverse_odd(BIGNUM *out, int *out_no_inverse, const BIGNUM *a,
+                       const BIGNUM *n, BN_CTX *ctx) {
+  *out_no_inverse = 0;
+
+  if (!BN_is_odd(n)) {
+    OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
+    return 0;
+  }
+
+  if (BN_is_negative(a) || BN_cmp(a, n) >= 0) {
+    OPENSSL_PUT_ERROR(BN, BN_R_INPUT_NOT_REDUCED);
+    return 0;
+  }
+
+  BIGNUM *A, *B, *X, *Y;
+  int ret = 0;
+  int sign;
+
+  BN_CTX_start(ctx);
+  A = BN_CTX_get(ctx);
+  B = BN_CTX_get(ctx);
+  X = BN_CTX_get(ctx);
+  Y = BN_CTX_get(ctx);
+  if (Y == NULL) {
+    goto err;
+  }
+
+  BIGNUM *R = out;
+
+  BN_zero(Y);
+  if (!BN_one(X) || BN_copy(B, a) == NULL || BN_copy(A, n) == NULL) {
+    goto err;
+  }
+  A->neg = 0;
+  sign = -1;
+  // From  B = a mod |n|,  A = |n|  it follows that
+  //
+  //      0 <= B < A,
+  //     -sign*X*a  ==  B   (mod |n|),
+  //      sign*Y*a  ==  A   (mod |n|).
+
+  // Binary inversion algorithm; requires odd modulus. This is faster than the
+  // general algorithm if the modulus is sufficiently small (about 400 .. 500
+  // bits on 32-bit systems, but much more on 64-bit systems)
+  int shift;
+
+  while (!BN_is_zero(B)) {
+    //      0 < B < |n|,
+    //      0 < A <= |n|,
+    // (1) -sign*X*a  ==  B   (mod |n|),
+    // (2)  sign*Y*a  ==  A   (mod |n|)
+
+    // Now divide  B  by the maximum possible power of two in the integers,
+    // and divide  X  by the same value mod |n|.
+    // When we're done, (1) still holds.
+    shift = 0;
+    while (!BN_is_bit_set(B, shift)) {
+      // note that 0 < B
+      shift++;
+
+      if (BN_is_odd(X)) {
+        if (!BN_uadd(X, X, n)) {
+          goto err;
+        }
+      }
+      // now X is even, so we can easily divide it by two
+      if (!BN_rshift1(X, X)) {
+        goto err;
+      }
+    }
+    if (shift > 0) {
+      if (!BN_rshift(B, B, shift)) {
+        goto err;
+      }
+    }
+
+    // Same for A and Y. Afterwards, (2) still holds.
+    shift = 0;
+    while (!BN_is_bit_set(A, shift)) {
+      // note that 0 < A
+      shift++;
+
+      if (BN_is_odd(Y)) {
+        if (!BN_uadd(Y, Y, n)) {
+          goto err;
+        }
+      }
+      // now Y is even
+      if (!BN_rshift1(Y, Y)) {
+        goto err;
+      }
+    }
+    if (shift > 0) {
+      if (!BN_rshift(A, A, shift)) {
+        goto err;
+      }
+    }
+
+    // We still have (1) and (2).
+    // Both  A  and  B  are odd.
+    // The following computations ensure that
+    //
+    //     0 <= B < |n|,
+    //      0 < A < |n|,
+    // (1) -sign*X*a  ==  B   (mod |n|),
+    // (2)  sign*Y*a  ==  A   (mod |n|),
+    //
+    // and that either  A  or  B  is even in the next iteration.
+    if (BN_ucmp(B, A) >= 0) {
+      // -sign*(X + Y)*a == B - A  (mod |n|)
+      if (!BN_uadd(X, X, Y)) {
+        goto err;
+      }
+      // NB: we could use BN_mod_add_quick(X, X, Y, n), but that
+      // actually makes the algorithm slower
+      if (!BN_usub(B, B, A)) {
+        goto err;
+      }
+    } else {
+      //  sign*(X + Y)*a == A - B  (mod |n|)
+      if (!BN_uadd(Y, Y, X)) {
+        goto err;
+      }
+      // as above, BN_mod_add_quick(Y, Y, X, n) would slow things down
+      if (!BN_usub(A, A, B)) {
+        goto err;
+      }
+    }
+  }
+
+  if (!BN_is_one(A)) {
+    *out_no_inverse = 1;
+    OPENSSL_PUT_ERROR(BN, BN_R_NO_INVERSE);
+    goto err;
+  }
+
+  // The while loop (Euclid's algorithm) ends when
+  //      A == gcd(a,n);
+  // we have
+  //       sign*Y*a  ==  A  (mod |n|),
+  // where  Y  is non-negative.
+
+  if (sign < 0) {
+    if (!BN_sub(Y, n, Y)) {
+      goto err;
+    }
+  }
+  // Now  Y*a  ==  A  (mod |n|).
+
+  // Y*a == 1  (mod |n|)
+  if (!Y->neg && BN_ucmp(Y, n) < 0) {
+    if (!BN_copy(R, Y)) {
+      goto err;
+    }
+  } else {
+    if (!BN_nnmod(R, Y, n, ctx)) {
+      goto err;
+    }
+  }
+
+  ret = 1;
+
+err:
+  BN_CTX_end(ctx);
+  return ret;
+}
+
+BIGNUM *BN_mod_inverse(BIGNUM *out, const BIGNUM *a, const BIGNUM *n,
+                       BN_CTX *ctx) {
+  BIGNUM *new_out = NULL;
+  if (out == NULL) {
+    new_out = BN_new();
+    if (new_out == NULL) {
+      OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
+      return NULL;
+    }
+    out = new_out;
+  }
+
+  int ok = 0;
+  BIGNUM *a_reduced = NULL;
+  if (a->neg || BN_ucmp(a, n) >= 0) {
+    a_reduced = BN_dup(a);
+    if (a_reduced == NULL) {
+      goto err;
+    }
+    if (!BN_nnmod(a_reduced, a_reduced, n, ctx)) {
+      goto err;
+    }
+    a = a_reduced;
+  }
+
+  int no_inverse;
+  if (!BN_is_odd(n)) {
+    if (!bn_mod_inverse_consttime(out, &no_inverse, a, n, ctx)) {
+      goto err;
+    }
+  } else if (!BN_mod_inverse_odd(out, &no_inverse, a, n, ctx)) {
+    goto err;
+  }
+
+  ok = 1;
+
+err:
+  if (!ok) {
+    BN_free(new_out);
+    out = NULL;
+  }
+  BN_free(a_reduced);
+  return out;
+}
+
+int BN_mod_inverse_blinded(BIGNUM *out, int *out_no_inverse, const BIGNUM *a,
+                           const BN_MONT_CTX *mont, BN_CTX *ctx) {
+  *out_no_inverse = 0;
+
+  if (BN_is_negative(a) || BN_cmp(a, &mont->N) >= 0) {
+    OPENSSL_PUT_ERROR(BN, BN_R_INPUT_NOT_REDUCED);
+    return 0;
+  }
+
+  int ret = 0;
+  BIGNUM blinding_factor;
+  BN_init(&blinding_factor);
+
+  if (!BN_rand_range_ex(&blinding_factor, 1, &mont->N) ||
+      !BN_mod_mul_montgomery(out, &blinding_factor, a, mont, ctx) ||
+      !BN_mod_inverse_odd(out, out_no_inverse, out, &mont->N, ctx) ||
+      !BN_mod_mul_montgomery(out, &blinding_factor, out, mont, ctx)) {
+    OPENSSL_PUT_ERROR(BN, ERR_R_BN_LIB);
+    goto err;
+  }
+
+  ret = 1;
+
+err:
+  BN_free(&blinding_factor);
+  return ret;
+}
+
+int bn_mod_inverse_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
+                         BN_CTX *ctx, const BN_MONT_CTX *mont_p) {
+  BN_CTX_start(ctx);
+  BIGNUM *p_minus_2 = BN_CTX_get(ctx);
+  int ok = p_minus_2 != NULL &&
+           BN_copy(p_minus_2, p) &&
+           BN_sub_word(p_minus_2, 2) &&
+           BN_mod_exp_mont(out, a, p_minus_2, p, ctx, mont_p);
+  BN_CTX_end(ctx);
+  return ok;
+}
+
+int bn_mod_inverse_secret_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
+                                BN_CTX *ctx, const BN_MONT_CTX *mont_p) {
+  BN_CTX_start(ctx);
+  BIGNUM *p_minus_2 = BN_CTX_get(ctx);
+  int ok = p_minus_2 != NULL &&
+           BN_copy(p_minus_2, p) &&
+           BN_sub_word(p_minus_2, 2) &&
+           BN_mod_exp_mont_consttime(out, a, p_minus_2, p, ctx, mont_p);
+  BN_CTX_end(ctx);
+  return ok;
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/gcd_extra.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/gcd_extra.c
@ -0,0 +1,325 @@
+/* Copyright (c) 2018, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <CBigNumBoringSSL_bn.h>
+
+#include <assert.h>
+
+#include <CBigNumBoringSSL_err.h>
+
+#include "internal.h"
+
+
+static BN_ULONG word_is_odd_mask(BN_ULONG a) { return (BN_ULONG)0 - (a & 1); }
+
+static void maybe_rshift1_words(BN_ULONG *a, BN_ULONG mask, BN_ULONG *tmp,
+                                size_t num) {
+  bn_rshift1_words(tmp, a, num);
+  bn_select_words(a, mask, tmp, a, num);
+}
+
+static void maybe_rshift1_words_carry(BN_ULONG *a, BN_ULONG carry,
+                                      BN_ULONG mask, BN_ULONG *tmp,
+                                      size_t num) {
+  maybe_rshift1_words(a, mask, tmp, num);
+  if (num != 0) {
+    carry &= mask;
+    a[num - 1] |= carry << (BN_BITS2-1);
+  }
+}
+
+static BN_ULONG maybe_add_words(BN_ULONG *a, BN_ULONG mask, const BN_ULONG *b,
+                                BN_ULONG *tmp, size_t num) {
+  BN_ULONG carry = bn_add_words(tmp, a, b, num);
+  bn_select_words(a, mask, tmp, a, num);
+  return carry & mask;
+}
+
+static int bn_gcd_consttime(BIGNUM *r, unsigned *out_shift, const BIGNUM *x,
+                            const BIGNUM *y, BN_CTX *ctx) {
+  size_t width = x->width > y->width ? x->width : y->width;
+  if (width == 0) {
+    *out_shift = 0;
+    BN_zero(r);
+    return 1;
+  }
+
+  // This is a constant-time implementation of Stein's algorithm (binary GCD).
+  int ret = 0;
+  BN_CTX_start(ctx);
+  BIGNUM *u = BN_CTX_get(ctx);
+  BIGNUM *v = BN_CTX_get(ctx);
+  BIGNUM *tmp = BN_CTX_get(ctx);
+  if (u == NULL || v == NULL || tmp == NULL ||
+      !BN_copy(u, x) ||
+      !BN_copy(v, y) ||
+      !bn_resize_words(u, width) ||
+      !bn_resize_words(v, width) ||
+      !bn_resize_words(tmp, width)) {
+    goto err;
+  }
+
+  // Each loop iteration halves at least one of |u| and |v|. Thus we need at
+  // most the combined bit width of inputs for at least one value to be zero.
+  unsigned x_bits = x->width * BN_BITS2, y_bits = y->width * BN_BITS2;
+  unsigned num_iters = x_bits + y_bits;
+  if (num_iters < x_bits) {
+    OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
+    goto err;
+  }
+
+  unsigned shift = 0;
+  for (unsigned i = 0; i < num_iters; i++) {
+    BN_ULONG both_odd = word_is_odd_mask(u->d[0]) & word_is_odd_mask(v->d[0]);
+
+    // If both |u| and |v| are odd, subtract the smaller from the larger.
+    BN_ULONG u_less_than_v =
+        (BN_ULONG)0 - bn_sub_words(tmp->d, u->d, v->d, width);
+    bn_select_words(u->d, both_odd & ~u_less_than_v, tmp->d, u->d, width);
+    bn_sub_words(tmp->d, v->d, u->d, width);
+    bn_select_words(v->d, both_odd & u_less_than_v, tmp->d, v->d, width);
+
+    // At least one of |u| and |v| is now even.
+    BN_ULONG u_is_odd = word_is_odd_mask(u->d[0]);
+    BN_ULONG v_is_odd = word_is_odd_mask(v->d[0]);
+    assert(!(u_is_odd & v_is_odd));
+
+    // If both are even, the final GCD gains a factor of two.
+    shift += 1 & (~u_is_odd & ~v_is_odd);
+
+    // Halve any which are even.
+    maybe_rshift1_words(u->d, ~u_is_odd, tmp->d, width);
+    maybe_rshift1_words(v->d, ~v_is_odd, tmp->d, width);
+  }
+
+  // One of |u| or |v| is zero at this point. The algorithm usually makes |u|
+  // zero, unless |y| was already zero on input. Fix this by combining the
+  // values.
+  assert(BN_is_zero(u) || BN_is_zero(v));
+  for (size_t i = 0; i < width; i++) {
+    v->d[i] |= u->d[i];
+  }
+
+  *out_shift = shift;
+  ret = bn_set_words(r, v->d, width);
+
+err:
+  BN_CTX_end(ctx);
+  return ret;
+}
+
+int BN_gcd(BIGNUM *r, const BIGNUM *x, const BIGNUM *y, BN_CTX *ctx) {
+  unsigned shift;
+  return bn_gcd_consttime(r, &shift, x, y, ctx) &&
+         BN_lshift(r, r, shift);
+}
+
+int bn_is_relatively_prime(int *out_relatively_prime, const BIGNUM *x,
+                           const BIGNUM *y, BN_CTX *ctx) {
+  int ret = 0;
+  BN_CTX_start(ctx);
+  unsigned shift;
+  BIGNUM *gcd = BN_CTX_get(ctx);
+  if (gcd == NULL ||
+      !bn_gcd_consttime(gcd, &shift, x, y, ctx)) {
+    goto err;
+  }
+
+  // Check that 2^|shift| * |gcd| is one.
+  if (gcd->width == 0) {
+    *out_relatively_prime = 0;
+  } else {
+    BN_ULONG mask = shift | (gcd->d[0] ^ 1);
+    for (int i = 1; i < gcd->width; i++) {
+      mask |= gcd->d[i];
+    }
+    *out_relatively_prime = mask == 0;
+  }
+  ret = 1;
+
+err:
+  BN_CTX_end(ctx);
+  return ret;
+}
+
+int bn_lcm_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
+  BN_CTX_start(ctx);
+  unsigned shift;
+  BIGNUM *gcd = BN_CTX_get(ctx);
+  int ret = gcd != NULL &&
+            bn_mul_consttime(r, a, b, ctx) &&
+            bn_gcd_consttime(gcd, &shift, a, b, ctx) &&
+            bn_div_consttime(r, NULL, r, gcd, ctx) &&
+            bn_rshift_secret_shift(r, r, shift, ctx);
+  BN_CTX_end(ctx);
+  return ret;
+}
+
+int bn_mod_inverse_consttime(BIGNUM *r, int *out_no_inverse, const BIGNUM *a,
+                             const BIGNUM *n, BN_CTX *ctx) {
+  *out_no_inverse = 0;
+  if (BN_is_negative(a) || BN_ucmp(a, n) >= 0) {
+    OPENSSL_PUT_ERROR(BN, BN_R_INPUT_NOT_REDUCED);
+    return 0;
+  }
+  if (BN_is_zero(a)) {
+    if (BN_is_one(n)) {
+      BN_zero(r);
+      return 1;
+    }
+    *out_no_inverse = 1;
+    OPENSSL_PUT_ERROR(BN, BN_R_NO_INVERSE);
+    return 0;
+  }
+
+  // This is a constant-time implementation of the extended binary GCD
+  // algorithm. It is adapted from the Handbook of Applied Cryptography, section
+  // 14.4.3, algorithm 14.51, and modified to bound coefficients and avoid
+  // negative numbers.
+  //
+  // For more details and proof of correctness, see
+  // https://github.com/mit-plv/fiat-crypto/pull/333. In particular, see |step|
+  // and |mod_inverse_consttime| for the algorithm in Gallina and see
+  // |mod_inverse_consttime_spec| for the correctness result.
+
+  if (!BN_is_odd(a) && !BN_is_odd(n)) {
+    *out_no_inverse = 1;
+    OPENSSL_PUT_ERROR(BN, BN_R_NO_INVERSE);
+    return 0;
+  }
+
+  // This function exists to compute the RSA private exponent, where |a| is one
+  // word. We'll thus use |a_width| when available.
+  size_t n_width = n->width, a_width = a->width;
+  if (a_width > n_width) {
+    a_width = n_width;
+  }
+
+  int ret = 0;
+  BN_CTX_start(ctx);
+  BIGNUM *u = BN_CTX_get(ctx);
+  BIGNUM *v = BN_CTX_get(ctx);
+  BIGNUM *A = BN_CTX_get(ctx);
+  BIGNUM *B = BN_CTX_get(ctx);
+  BIGNUM *C = BN_CTX_get(ctx);
+  BIGNUM *D = BN_CTX_get(ctx);
+  BIGNUM *tmp = BN_CTX_get(ctx);
+  BIGNUM *tmp2 = BN_CTX_get(ctx);
+  if (u == NULL || v == NULL || A == NULL || B == NULL || C == NULL ||
+      D == NULL || tmp == NULL || tmp2 == NULL ||
+      !BN_copy(u, a) ||
+      !BN_copy(v, n) ||
+      !BN_one(A) ||
+      !BN_one(D) ||
+      // For convenience, size |u| and |v| equivalently.
+      !bn_resize_words(u, n_width) ||
+      !bn_resize_words(v, n_width) ||
+      // |A| and |C| are bounded by |m|.
+      !bn_resize_words(A, n_width) ||
+      !bn_resize_words(C, n_width) ||
+      // |B| and |D| are bounded by |a|.
+      !bn_resize_words(B, a_width) ||
+      !bn_resize_words(D, a_width) ||
+      // |tmp| and |tmp2| may be used at either size.
+      !bn_resize_words(tmp, n_width) ||
+      !bn_resize_words(tmp2, n_width)) {
+    goto err;
+  }
+
+  // Each loop iteration halves at least one of |u| and |v|. Thus we need at
+  // most the combined bit width of inputs for at least one value to be zero.
+  unsigned a_bits = a_width * BN_BITS2, n_bits = n_width * BN_BITS2;
+  unsigned num_iters = a_bits + n_bits;
+  if (num_iters < a_bits) {
+    OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
+    goto err;
+  }
+
+  // Before and after each loop iteration, the following hold:
+  //
+  //   u = A*a - B*n
+  //   v = D*n - C*a
+  //   0 < u <= a
+  //   0 <= v <= n
+  //   0 <= A < n
+  //   0 <= B <= a
+  //   0 <= C < n
+  //   0 <= D <= a
+  //
+  // After each loop iteration, u and v only get smaller, and at least one of
+  // them shrinks by at least a factor of two.
+  for (unsigned i = 0; i < num_iters; i++) {
+    BN_ULONG both_odd = word_is_odd_mask(u->d[0]) & word_is_odd_mask(v->d[0]);
+
+    // If both |u| and |v| are odd, subtract the smaller from the larger.
+    BN_ULONG v_less_than_u =
+        (BN_ULONG)0 - bn_sub_words(tmp->d, v->d, u->d, n_width);
+    bn_select_words(v->d, both_odd & ~v_less_than_u, tmp->d, v->d, n_width);
+    bn_sub_words(tmp->d, u->d, v->d, n_width);
+    bn_select_words(u->d, both_odd & v_less_than_u, tmp->d, u->d, n_width);
+
+    // If we updated one of the values, update the corresponding coefficient.
+    BN_ULONG carry = bn_add_words(tmp->d, A->d, C->d, n_width);
+    carry -= bn_sub_words(tmp2->d, tmp->d, n->d, n_width);
+    bn_select_words(tmp->d, carry, tmp->d, tmp2->d, n_width);
+    bn_select_words(A->d, both_odd & v_less_than_u, tmp->d, A->d, n_width);
+    bn_select_words(C->d, both_odd & ~v_less_than_u, tmp->d, C->d, n_width);
+
+    bn_add_words(tmp->d, B->d, D->d, a_width);
+    bn_sub_words(tmp2->d, tmp->d, a->d, a_width);
+    bn_select_words(tmp->d, carry, tmp->d, tmp2->d, a_width);
+    bn_select_words(B->d, both_odd & v_less_than_u, tmp->d, B->d, a_width);
+    bn_select_words(D->d, both_odd & ~v_less_than_u, tmp->d, D->d, a_width);
+
+    // Our loop invariants hold at this point. Additionally, exactly one of |u|
+    // and |v| is now even.
+    BN_ULONG u_is_even = ~word_is_odd_mask(u->d[0]);
+    BN_ULONG v_is_even = ~word_is_odd_mask(v->d[0]);
+    assert(u_is_even != v_is_even);
+
+    // Halve the even one and adjust the corresponding coefficient.
+    maybe_rshift1_words(u->d, u_is_even, tmp->d, n_width);
+    BN_ULONG A_or_B_is_odd =
+        word_is_odd_mask(A->d[0]) | word_is_odd_mask(B->d[0]);
+    BN_ULONG A_carry =
+        maybe_add_words(A->d, A_or_B_is_odd & u_is_even, n->d, tmp->d, n_width);
+    BN_ULONG B_carry =
+        maybe_add_words(B->d, A_or_B_is_odd & u_is_even, a->d, tmp->d, a_width);
+    maybe_rshift1_words_carry(A->d, A_carry, u_is_even, tmp->d, n_width);
+    maybe_rshift1_words_carry(B->d, B_carry, u_is_even, tmp->d, a_width);
+
+    maybe_rshift1_words(v->d, v_is_even, tmp->d, n_width);
+    BN_ULONG C_or_D_is_odd =
+        word_is_odd_mask(C->d[0]) | word_is_odd_mask(D->d[0]);
+    BN_ULONG C_carry =
+        maybe_add_words(C->d, C_or_D_is_odd & v_is_even, n->d, tmp->d, n_width);
+    BN_ULONG D_carry =
+        maybe_add_words(D->d, C_or_D_is_odd & v_is_even, a->d, tmp->d, a_width);
+    maybe_rshift1_words_carry(C->d, C_carry, v_is_even, tmp->d, n_width);
+    maybe_rshift1_words_carry(D->d, D_carry, v_is_even, tmp->d, a_width);
+  }
+
+  assert(BN_is_zero(v));
+  if (!BN_is_one(u)) {
+    *out_no_inverse = 1;
+    OPENSSL_PUT_ERROR(BN, BN_R_NO_INVERSE);
+    goto err;
+  }
+
+  ret = BN_copy(r, A) != NULL;
+
+err:
+  BN_CTX_end(ctx);
+  return ret;
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/generic.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/generic.c
@ -0,0 +1,711 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.] */
+
+#include <CBigNumBoringSSL_bn.h>
+
+#include <assert.h>
+
+#include "internal.h"
+
+
+// This file has two other implementations: x86 assembly language in
+// asm/bn-586.pl and x86_64 inline assembly in asm/x86_64-gcc.c.
+#if defined(OPENSSL_NO_ASM) || \
+    !(defined(OPENSSL_X86) ||  \
+      (defined(OPENSSL_X86_64) && (defined(__GNUC__) || defined(__clang__))))
+
+#ifdef BN_ULLONG
+#define mul_add(r, a, w, c)               \
+  do {                                    \
+    BN_ULLONG t;                          \
+    t = (BN_ULLONG)(w) * (a) + (r) + (c); \
+    (r) = Lw(t);                          \
+    (c) = Hw(t);                          \
+  } while (0)
+
+#define mul(r, a, w, c)             \
+  do {                              \
+    BN_ULLONG t;                    \
+    t = (BN_ULLONG)(w) * (a) + (c); \
+    (r) = Lw(t);                    \
+    (c) = Hw(t);                    \
+  } while (0)
+
+#define sqr(r0, r1, a)        \
+  do {                        \
+    BN_ULLONG t;              \
+    t = (BN_ULLONG)(a) * (a); \
+    (r0) = Lw(t);             \
+    (r1) = Hw(t);             \
+  } while (0)
+
+#else
+
+#define mul_add(r, a, w, c)             \
+  do {                                  \
+    BN_ULONG high, low, ret, tmp = (a); \
+    ret = (r);                          \
+    BN_UMULT_LOHI(low, high, w, tmp);   \
+    ret += (c);                         \
+    (c) = (ret < (c)) ? 1 : 0;          \
+    (c) += high;                        \
+    ret += low;                         \
+    (c) += (ret < low) ? 1 : 0;         \
+    (r) = ret;                          \
+  } while (0)
+
+#define mul(r, a, w, c)                \
+  do {                                 \
+    BN_ULONG high, low, ret, ta = (a); \
+    BN_UMULT_LOHI(low, high, w, ta);   \
+    ret = low + (c);                   \
+    (c) = high;                        \
+    (c) += (ret < low) ? 1 : 0;        \
+    (r) = ret;                         \
+  } while (0)
+
+#define sqr(r0, r1, a)               \
+  do {                               \
+    BN_ULONG tmp = (a);              \
+    BN_UMULT_LOHI(r0, r1, tmp, tmp); \
+  } while (0)
+
+#endif  // !BN_ULLONG
+
+BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
+                          BN_ULONG w) {
+  BN_ULONG c1 = 0;
+
+  if (num == 0) {
+    return c1;
+  }
+
+  while (num & ~3) {
+    mul_add(rp[0], ap[0], w, c1);
+    mul_add(rp[1], ap[1], w, c1);
+    mul_add(rp[2], ap[2], w, c1);
+    mul_add(rp[3], ap[3], w, c1);
+    ap += 4;
+    rp += 4;
+    num -= 4;
+  }
+
+  while (num) {
+    mul_add(rp[0], ap[0], w, c1);
+    ap++;
+    rp++;
+    num--;
+  }
+
+  return c1;
+}
+
+BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
+                      BN_ULONG w) {
+  BN_ULONG c1 = 0;
+
+  if (num == 0) {
+    return c1;
+  }
+
+  while (num & ~3) {
+    mul(rp[0], ap[0], w, c1);
+    mul(rp[1], ap[1], w, c1);
+    mul(rp[2], ap[2], w, c1);
+    mul(rp[3], ap[3], w, c1);
+    ap += 4;
+    rp += 4;
+    num -= 4;
+  }
+  while (num) {
+    mul(rp[0], ap[0], w, c1);
+    ap++;
+    rp++;
+    num--;
+  }
+  return c1;
+}
+
+void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, size_t n) {
+  if (n == 0) {
+    return;
+  }
+
+  while (n & ~3) {
+    sqr(r[0], r[1], a[0]);
+    sqr(r[2], r[3], a[1]);
+    sqr(r[4], r[5], a[2]);
+    sqr(r[6], r[7], a[3]);
+    a += 4;
+    r += 8;
+    n -= 4;
+  }
+  while (n) {
+    sqr(r[0], r[1], a[0]);
+    a++;
+    r += 2;
+    n--;
+  }
+}
+
+#ifdef BN_ULLONG
+BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
+                      size_t n) {
+  BN_ULLONG ll = 0;
+
+  if (n == 0) {
+    return 0;
+  }
+
+  while (n & ~3) {
+    ll += (BN_ULLONG)a[0] + b[0];
+    r[0] = (BN_ULONG)ll;
+    ll >>= BN_BITS2;
+    ll += (BN_ULLONG)a[1] + b[1];
+    r[1] = (BN_ULONG)ll;
+    ll >>= BN_BITS2;
+    ll += (BN_ULLONG)a[2] + b[2];
+    r[2] = (BN_ULONG)ll;
+    ll >>= BN_BITS2;
+    ll += (BN_ULLONG)a[3] + b[3];
+    r[3] = (BN_ULONG)ll;
+    ll >>= BN_BITS2;
+    a += 4;
+    b += 4;
+    r += 4;
+    n -= 4;
+  }
+  while (n) {
+    ll += (BN_ULLONG)a[0] + b[0];
+    r[0] = (BN_ULONG)ll;
+    ll >>= BN_BITS2;
+    a++;
+    b++;
+    r++;
+    n--;
+  }
+  return (BN_ULONG)ll;
+}
+
+#else  // !BN_ULLONG
+
+BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
+                      size_t n) {
+  BN_ULONG c, l, t;
+
+  if (n == 0) {
+    return (BN_ULONG)0;
+  }
+
+  c = 0;
+  while (n & ~3) {
+    t = a[0];
+    t += c;
+    c = (t < c);
+    l = t + b[0];
+    c += (l < t);
+    r[0] = l;
+    t = a[1];
+    t += c;
+    c = (t < c);
+    l = t + b[1];
+    c += (l < t);
+    r[1] = l;
+    t = a[2];
+    t += c;
+    c = (t < c);
+    l = t + b[2];
+    c += (l < t);
+    r[2] = l;
+    t = a[3];
+    t += c;
+    c = (t < c);
+    l = t + b[3];
+    c += (l < t);
+    r[3] = l;
+    a += 4;
+    b += 4;
+    r += 4;
+    n -= 4;
+  }
+  while (n) {
+    t = a[0];
+    t += c;
+    c = (t < c);
+    l = t + b[0];
+    c += (l < t);
+    r[0] = l;
+    a++;
+    b++;
+    r++;
+    n--;
+  }
+  return (BN_ULONG)c;
+}
+
+#endif  // !BN_ULLONG
+
+BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
+                      size_t n) {
+  BN_ULONG t1, t2;
+  int c = 0;
+
+  if (n == 0) {
+    return (BN_ULONG)0;
+  }
+
+  while (n & ~3) {
+    t1 = a[0];
+    t2 = b[0];
+    r[0] = t1 - t2 - c;
+    if (t1 != t2) {
+      c = (t1 < t2);
+    }
+    t1 = a[1];
+    t2 = b[1];
+    r[1] = t1 - t2 - c;
+    if (t1 != t2) {
+      c = (t1 < t2);
+    }
+    t1 = a[2];
+    t2 = b[2];
+    r[2] = t1 - t2 - c;
+    if (t1 != t2) {
+      c = (t1 < t2);
+    }
+    t1 = a[3];
+    t2 = b[3];
+    r[3] = t1 - t2 - c;
+    if (t1 != t2) {
+      c = (t1 < t2);
+    }
+    a += 4;
+    b += 4;
+    r += 4;
+    n -= 4;
+  }
+  while (n) {
+    t1 = a[0];
+    t2 = b[0];
+    r[0] = t1 - t2 - c;
+    if (t1 != t2) {
+      c = (t1 < t2);
+    }
+    a++;
+    b++;
+    r++;
+    n--;
+  }
+  return c;
+}
+
+// mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0)
+// mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0)
+// sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0)
+// sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0)
+
+#ifdef BN_ULLONG
+
+// Keep in mind that additions to multiplication result can not overflow,
+// because its high half cannot be all-ones.
+#define mul_add_c(a, b, c0, c1, c2)     \
+  do {                                  \
+    BN_ULONG hi;                        \
+    BN_ULLONG t = (BN_ULLONG)(a) * (b); \
+    t += (c0); /* no carry */           \
+    (c0) = (BN_ULONG)Lw(t);             \
+    hi = (BN_ULONG)Hw(t);               \
+    (c1) += (hi);                       \
+    if ((c1) < hi) {                    \
+      (c2)++;                           \
+    }                                   \
+  } while (0)
+
+#define mul_add_c2(a, b, c0, c1, c2)        \
+  do {                                      \
+    BN_ULONG hi;                            \
+    BN_ULLONG t = (BN_ULLONG)(a) * (b);     \
+    BN_ULLONG tt = t + (c0); /* no carry */ \
+    (c0) = (BN_ULONG)Lw(tt);                \
+    hi = (BN_ULONG)Hw(tt);                  \
+    (c1) += hi;                             \
+    if ((c1) < hi) {                        \
+      (c2)++;                               \
+    }                                       \
+    t += (c0); /* no carry */               \
+    (c0) = (BN_ULONG)Lw(t);                 \
+    hi = (BN_ULONG)Hw(t);                   \
+    (c1) += hi;                             \
+    if ((c1) < hi) {                        \
+      (c2)++;                               \
+    }                                       \
+  } while (0)
+
+#define sqr_add_c(a, i, c0, c1, c2)           \
+  do {                                        \
+    BN_ULONG hi;                              \
+    BN_ULLONG t = (BN_ULLONG)(a)[i] * (a)[i]; \
+    t += (c0); /* no carry */                 \
+    (c0) = (BN_ULONG)Lw(t);                   \
+    hi = (BN_ULONG)Hw(t);                     \
+    (c1) += hi;                               \
+    if ((c1) < hi) {                          \
+      (c2)++;                                 \
+    }                                         \
+  } while (0)
+
+#define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
+
+#else
+
+// Keep in mind that additions to hi can not overflow, because the high word of
+// a multiplication result cannot be all-ones.
+#define mul_add_c(a, b, c0, c1, c2) \
+  do {                              \
+    BN_ULONG ta = (a), tb = (b);    \
+    BN_ULONG lo, hi;                \
+    BN_UMULT_LOHI(lo, hi, ta, tb);  \
+    (c0) += lo;                     \
+    hi += ((c0) < lo) ? 1 : 0;      \
+    (c1) += hi;                     \
+    (c2) += ((c1) < hi) ? 1 : 0;    \
+  } while (0)
+
+#define mul_add_c2(a, b, c0, c1, c2) \
+  do {                               \
+    BN_ULONG ta = (a), tb = (b);     \
+    BN_ULONG lo, hi, tt;             \
+    BN_UMULT_LOHI(lo, hi, ta, tb);   \
+    (c0) += lo;                      \
+    tt = hi + (((c0) < lo) ? 1 : 0); \
+    (c1) += tt;                      \
+    (c2) += ((c1) < tt) ? 1 : 0;     \
+    (c0) += lo;                      \
+    hi += (c0 < lo) ? 1 : 0;         \
+    (c1) += hi;                      \
+    (c2) += ((c1) < hi) ? 1 : 0;     \
+  } while (0)
+
+#define sqr_add_c(a, i, c0, c1, c2) \
+  do {                              \
+    BN_ULONG ta = (a)[i];           \
+    BN_ULONG lo, hi;                \
+    BN_UMULT_LOHI(lo, hi, ta, ta);  \
+    (c0) += lo;                     \
+    hi += (c0 < lo) ? 1 : 0;        \
+    (c1) += hi;                     \
+    (c2) += ((c1) < hi) ? 1 : 0;    \
+  } while (0)
+
+#define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
+
+#endif  // !BN_ULLONG
+
+void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]) {
+  BN_ULONG c1, c2, c3;
+
+  c1 = 0;
+  c2 = 0;
+  c3 = 0;
+  mul_add_c(a[0], b[0], c1, c2, c3);
+  r[0] = c1;
+  c1 = 0;
+  mul_add_c(a[0], b[1], c2, c3, c1);
+  mul_add_c(a[1], b[0], c2, c3, c1);
+  r[1] = c2;
+  c2 = 0;
+  mul_add_c(a[2], b[0], c3, c1, c2);
+  mul_add_c(a[1], b[1], c3, c1, c2);
+  mul_add_c(a[0], b[2], c3, c1, c2);
+  r[2] = c3;
+  c3 = 0;
+  mul_add_c(a[0], b[3], c1, c2, c3);
+  mul_add_c(a[1], b[2], c1, c2, c3);
+  mul_add_c(a[2], b[1], c1, c2, c3);
+  mul_add_c(a[3], b[0], c1, c2, c3);
+  r[3] = c1;
+  c1 = 0;
+  mul_add_c(a[4], b[0], c2, c3, c1);
+  mul_add_c(a[3], b[1], c2, c3, c1);
+  mul_add_c(a[2], b[2], c2, c3, c1);
+  mul_add_c(a[1], b[3], c2, c3, c1);
+  mul_add_c(a[0], b[4], c2, c3, c1);
+  r[4] = c2;
+  c2 = 0;
+  mul_add_c(a[0], b[5], c3, c1, c2);
+  mul_add_c(a[1], b[4], c3, c1, c2);
+  mul_add_c(a[2], b[3], c3, c1, c2);
+  mul_add_c(a[3], b[2], c3, c1, c2);
+  mul_add_c(a[4], b[1], c3, c1, c2);
+  mul_add_c(a[5], b[0], c3, c1, c2);
+  r[5] = c3;
+  c3 = 0;
+  mul_add_c(a[6], b[0], c1, c2, c3);
+  mul_add_c(a[5], b[1], c1, c2, c3);
+  mul_add_c(a[4], b[2], c1, c2, c3);
+  mul_add_c(a[3], b[3], c1, c2, c3);
+  mul_add_c(a[2], b[4], c1, c2, c3);
+  mul_add_c(a[1], b[5], c1, c2, c3);
+  mul_add_c(a[0], b[6], c1, c2, c3);
+  r[6] = c1;
+  c1 = 0;
+  mul_add_c(a[0], b[7], c2, c3, c1);
+  mul_add_c(a[1], b[6], c2, c3, c1);
+  mul_add_c(a[2], b[5], c2, c3, c1);
+  mul_add_c(a[3], b[4], c2, c3, c1);
+  mul_add_c(a[4], b[3], c2, c3, c1);
+  mul_add_c(a[5], b[2], c2, c3, c1);
+  mul_add_c(a[6], b[1], c2, c3, c1);
+  mul_add_c(a[7], b[0], c2, c3, c1);
+  r[7] = c2;
+  c2 = 0;
+  mul_add_c(a[7], b[1], c3, c1, c2);
+  mul_add_c(a[6], b[2], c3, c1, c2);
+  mul_add_c(a[5], b[3], c3, c1, c2);
+  mul_add_c(a[4], b[4], c3, c1, c2);
+  mul_add_c(a[3], b[5], c3, c1, c2);
+  mul_add_c(a[2], b[6], c3, c1, c2);
+  mul_add_c(a[1], b[7], c3, c1, c2);
+  r[8] = c3;
+  c3 = 0;
+  mul_add_c(a[2], b[7], c1, c2, c3);
+  mul_add_c(a[3], b[6], c1, c2, c3);
+  mul_add_c(a[4], b[5], c1, c2, c3);
+  mul_add_c(a[5], b[4], c1, c2, c3);
+  mul_add_c(a[6], b[3], c1, c2, c3);
+  mul_add_c(a[7], b[2], c1, c2, c3);
+  r[9] = c1;
+  c1 = 0;
+  mul_add_c(a[7], b[3], c2, c3, c1);
+  mul_add_c(a[6], b[4], c2, c3, c1);
+  mul_add_c(a[5], b[5], c2, c3, c1);
+  mul_add_c(a[4], b[6], c2, c3, c1);
+  mul_add_c(a[3], b[7], c2, c3, c1);
+  r[10] = c2;
+  c2 = 0;
+  mul_add_c(a[4], b[7], c3, c1, c2);
+  mul_add_c(a[5], b[6], c3, c1, c2);
+  mul_add_c(a[6], b[5], c3, c1, c2);
+  mul_add_c(a[7], b[4], c3, c1, c2);
+  r[11] = c3;
+  c3 = 0;
+  mul_add_c(a[7], b[5], c1, c2, c3);
+  mul_add_c(a[6], b[6], c1, c2, c3);
+  mul_add_c(a[5], b[7], c1, c2, c3);
+  r[12] = c1;
+  c1 = 0;
+  mul_add_c(a[6], b[7], c2, c3, c1);
+  mul_add_c(a[7], b[6], c2, c3, c1);
+  r[13] = c2;
+  c2 = 0;
+  mul_add_c(a[7], b[7], c3, c1, c2);
+  r[14] = c3;
+  r[15] = c1;
+}
+
+void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]) {
+  BN_ULONG c1, c2, c3;
+
+  c1 = 0;
+  c2 = 0;
+  c3 = 0;
+  mul_add_c(a[0], b[0], c1, c2, c3);
+  r[0] = c1;
+  c1 = 0;
+  mul_add_c(a[0], b[1], c2, c3, c1);
+  mul_add_c(a[1], b[0], c2, c3, c1);
+  r[1] = c2;
+  c2 = 0;
+  mul_add_c(a[2], b[0], c3, c1, c2);
+  mul_add_c(a[1], b[1], c3, c1, c2);
+  mul_add_c(a[0], b[2], c3, c1, c2);
+  r[2] = c3;
+  c3 = 0;
+  mul_add_c(a[0], b[3], c1, c2, c3);
+  mul_add_c(a[1], b[2], c1, c2, c3);
+  mul_add_c(a[2], b[1], c1, c2, c3);
+  mul_add_c(a[3], b[0], c1, c2, c3);
+  r[3] = c1;
+  c1 = 0;
+  mul_add_c(a[3], b[1], c2, c3, c1);
+  mul_add_c(a[2], b[2], c2, c3, c1);
+  mul_add_c(a[1], b[3], c2, c3, c1);
+  r[4] = c2;
+  c2 = 0;
+  mul_add_c(a[2], b[3], c3, c1, c2);
+  mul_add_c(a[3], b[2], c3, c1, c2);
+  r[5] = c3;
+  c3 = 0;
+  mul_add_c(a[3], b[3], c1, c2, c3);
+  r[6] = c1;
+  r[7] = c2;
+}
+
+void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[8]) {
+  BN_ULONG c1, c2, c3;
+
+  c1 = 0;
+  c2 = 0;
+  c3 = 0;
+  sqr_add_c(a, 0, c1, c2, c3);
+  r[0] = c1;
+  c1 = 0;
+  sqr_add_c2(a, 1, 0, c2, c3, c1);
+  r[1] = c2;
+  c2 = 0;
+  sqr_add_c(a, 1, c3, c1, c2);
+  sqr_add_c2(a, 2, 0, c3, c1, c2);
+  r[2] = c3;
+  c3 = 0;
+  sqr_add_c2(a, 3, 0, c1, c2, c3);
+  sqr_add_c2(a, 2, 1, c1, c2, c3);
+  r[3] = c1;
+  c1 = 0;
+  sqr_add_c(a, 2, c2, c3, c1);
+  sqr_add_c2(a, 3, 1, c2, c3, c1);
+  sqr_add_c2(a, 4, 0, c2, c3, c1);
+  r[4] = c2;
+  c2 = 0;
+  sqr_add_c2(a, 5, 0, c3, c1, c2);
+  sqr_add_c2(a, 4, 1, c3, c1, c2);
+  sqr_add_c2(a, 3, 2, c3, c1, c2);
+  r[5] = c3;
+  c3 = 0;
+  sqr_add_c(a, 3, c1, c2, c3);
+  sqr_add_c2(a, 4, 2, c1, c2, c3);
+  sqr_add_c2(a, 5, 1, c1, c2, c3);
+  sqr_add_c2(a, 6, 0, c1, c2, c3);
+  r[6] = c1;
+  c1 = 0;
+  sqr_add_c2(a, 7, 0, c2, c3, c1);
+  sqr_add_c2(a, 6, 1, c2, c3, c1);
+  sqr_add_c2(a, 5, 2, c2, c3, c1);
+  sqr_add_c2(a, 4, 3, c2, c3, c1);
+  r[7] = c2;
+  c2 = 0;
+  sqr_add_c(a, 4, c3, c1, c2);
+  sqr_add_c2(a, 5, 3, c3, c1, c2);
+  sqr_add_c2(a, 6, 2, c3, c1, c2);
+  sqr_add_c2(a, 7, 1, c3, c1, c2);
+  r[8] = c3;
+  c3 = 0;
+  sqr_add_c2(a, 7, 2, c1, c2, c3);
+  sqr_add_c2(a, 6, 3, c1, c2, c3);
+  sqr_add_c2(a, 5, 4, c1, c2, c3);
+  r[9] = c1;
+  c1 = 0;
+  sqr_add_c(a, 5, c2, c3, c1);
+  sqr_add_c2(a, 6, 4, c2, c3, c1);
+  sqr_add_c2(a, 7, 3, c2, c3, c1);
+  r[10] = c2;
+  c2 = 0;
+  sqr_add_c2(a, 7, 4, c3, c1, c2);
+  sqr_add_c2(a, 6, 5, c3, c1, c2);
+  r[11] = c3;
+  c3 = 0;
+  sqr_add_c(a, 6, c1, c2, c3);
+  sqr_add_c2(a, 7, 5, c1, c2, c3);
+  r[12] = c1;
+  c1 = 0;
+  sqr_add_c2(a, 7, 6, c2, c3, c1);
+  r[13] = c2;
+  c2 = 0;
+  sqr_add_c(a, 7, c3, c1, c2);
+  r[14] = c3;
+  r[15] = c1;
+}
+
+void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]) {
+  BN_ULONG c1, c2, c3;
+
+  c1 = 0;
+  c2 = 0;
+  c3 = 0;
+  sqr_add_c(a, 0, c1, c2, c3);
+  r[0] = c1;
+  c1 = 0;
+  sqr_add_c2(a, 1, 0, c2, c3, c1);
+  r[1] = c2;
+  c2 = 0;
+  sqr_add_c(a, 1, c3, c1, c2);
+  sqr_add_c2(a, 2, 0, c3, c1, c2);
+  r[2] = c3;
+  c3 = 0;
+  sqr_add_c2(a, 3, 0, c1, c2, c3);
+  sqr_add_c2(a, 2, 1, c1, c2, c3);
+  r[3] = c1;
+  c1 = 0;
+  sqr_add_c(a, 2, c2, c3, c1);
+  sqr_add_c2(a, 3, 1, c2, c3, c1);
+  r[4] = c2;
+  c2 = 0;
+  sqr_add_c2(a, 3, 2, c3, c1, c2);
+  r[5] = c3;
+  c3 = 0;
+  sqr_add_c(a, 3, c1, c2, c3);
+  r[6] = c1;
+  r[7] = c2;
+}
+
+#undef mul_add
+#undef mul
+#undef sqr
+#undef mul_add_c
+#undef mul_add_c2
+#undef sqr_add_c
+#undef sqr_add_c2
+
+#endif
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/internal.h
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/internal.h
@ -0,0 +1,694 @@
+/* Copyright (C) 1995-1997 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+/* ====================================================================
+ * Copyright (c) 1998-2006 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+/* ====================================================================
+ * Copyright 2002 Sun Microsystems, Inc. ALL RIGHTS RESERVED.
+ *
+ * Portions of the attached software ("Contribution") are developed by
+ * SUN MICROSYSTEMS, INC., and are contributed to the OpenSSL project.
+ *
+ * The Contribution is licensed pursuant to the Eric Young open source
+ * license provided above.
+ *
+ * The binary polynomial arithmetic software is originally written by
+ * Sheueling Chang Shantz and Douglas Stebila of Sun Microsystems
+ * Laboratories. */
+
+#ifndef OPENSSL_HEADER_BN_INTERNAL_H
+#define OPENSSL_HEADER_BN_INTERNAL_H
+
+#include <CBigNumBoringSSL_base.h>
+
+#if defined(OPENSSL_X86_64) && defined(_MSC_VER)
+OPENSSL_MSVC_PRAGMA(warning(push, 3))
+#include <intrin.h>
+OPENSSL_MSVC_PRAGMA(warning(pop))
+#pragma intrinsic(__umulh, _umul128)
+#endif
+
+#include "../../internal.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#if defined(OPENSSL_64_BIT)
+
+#if defined(BORINGSSL_HAS_UINT128)
+// MSVC doesn't support two-word integers on 64-bit.
+#define BN_ULLONG uint128_t
+#if defined(BORINGSSL_CAN_DIVIDE_UINT128)
+#define BN_CAN_DIVIDE_ULLONG
+#endif
+#endif
+
+#define BN_BITS2 64
+#define BN_BYTES 8
+#define BN_BITS4 32
+#define BN_MASK2 (0xffffffffffffffffUL)
+#define BN_MASK2l (0xffffffffUL)
+#define BN_MASK2h (0xffffffff00000000UL)
+#define BN_MASK2h1 (0xffffffff80000000UL)
+#define BN_MONT_CTX_N0_LIMBS 1
+#define BN_DEC_CONV (10000000000000000000UL)
+#define BN_DEC_NUM 19
+#define TOBN(hi, lo) ((BN_ULONG)(hi) << 32 | (lo))
+
+#elif defined(OPENSSL_32_BIT)
+
+#define BN_ULLONG uint64_t
+#define BN_CAN_DIVIDE_ULLONG
+#define BN_BITS2 32
+#define BN_BYTES 4
+#define BN_BITS4 16
+#define BN_MASK2 (0xffffffffUL)
+#define BN_MASK2l (0xffffUL)
+#define BN_MASK2h1 (0xffff8000UL)
+#define BN_MASK2h (0xffff0000UL)
+// On some 32-bit platforms, Montgomery multiplication is done using 64-bit
+// arithmetic with SIMD instructions. On such platforms, |BN_MONT_CTX::n0|
+// needs to be two words long. Only certain 32-bit platforms actually make use
+// of n0[1] and shorter R value would suffice for the others. However,
+// currently only the assembly files know which is which.
+#define BN_MONT_CTX_N0_LIMBS 2
+#define BN_DEC_CONV (1000000000UL)
+#define BN_DEC_NUM 9
+#define TOBN(hi, lo) (lo), (hi)
+
+#else
+#error "Must define either OPENSSL_32_BIT or OPENSSL_64_BIT"
+#endif
+
+#if !defined(OPENSSL_NO_ASM) && (defined(__GNUC__) || defined(__clang__))
+#define BN_CAN_USE_INLINE_ASM
+#endif
+
+// |BN_mod_exp_mont_consttime| is based on the assumption that the L1 data
+// cache line width of the target processor is at least the following value.
+#define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH 64
+
+// The number of |BN_ULONG|s needed for the |BN_mod_exp_mont_consttime| stack-
+// allocated storage buffer. The buffer is just the right size for the RSAZ
+// and is about ~1KB larger than what's necessary (4480 bytes) for 1024-bit
+// inputs.
+#define MOD_EXP_CTIME_STORAGE_LEN \
+  (((320u * 3u) + (32u * 9u * 16u)) / sizeof(BN_ULONG))
+
+#define STATIC_BIGNUM(x)                                    \
+  {                                                         \
+    (BN_ULONG *)(x), sizeof(x) / sizeof(BN_ULONG),          \
+        sizeof(x) / sizeof(BN_ULONG), 0, BN_FLG_STATIC_DATA \
+  }
+
+#if defined(BN_ULLONG)
+#define Lw(t) ((BN_ULONG)(t))
+#define Hw(t) ((BN_ULONG)((t) >> BN_BITS2))
+#endif
+
+// bn_minimal_width returns the minimal value of |bn->top| which fits the
+// value of |bn|.
+int bn_minimal_width(const BIGNUM *bn);
+
+// bn_set_minimal_width sets |bn->width| to |bn_minimal_width(bn)|. If |bn| is
+// zero, |bn->neg| is set to zero.
+void bn_set_minimal_width(BIGNUM *bn);
+
+// bn_wexpand ensures that |bn| has at least |words| works of space without
+// altering its value. It returns one on success or zero on allocation
+// failure.
+int bn_wexpand(BIGNUM *bn, size_t words);
+
+// bn_expand acts the same as |bn_wexpand|, but takes a number of bits rather
+// than a number of words.
+int bn_expand(BIGNUM *bn, size_t bits);
+
+// bn_resize_words adjusts |bn->top| to be |words|. It returns one on success
+// and zero on allocation error or if |bn|'s value is too large.
+OPENSSL_EXPORT int bn_resize_words(BIGNUM *bn, size_t words);
+
+// bn_select_words sets |r| to |a| if |mask| is all ones or |b| if |mask| is
+// all zeros.
+void bn_select_words(BN_ULONG *r, BN_ULONG mask, const BN_ULONG *a,
+                     const BN_ULONG *b, size_t num);
+
+// bn_set_words sets |bn| to the value encoded in the |num| words in |words|,
+// least significant word first.
+int bn_set_words(BIGNUM *bn, const BN_ULONG *words, size_t num);
+
+// bn_fits_in_words returns one if |bn| may be represented in |num| words, plus
+// a sign bit, and zero otherwise.
+int bn_fits_in_words(const BIGNUM *bn, size_t num);
+
+// bn_copy_words copies the value of |bn| to |out| and returns one if the value
+// is representable in |num| words. Otherwise, it returns zero.
+int bn_copy_words(BN_ULONG *out, size_t num, const BIGNUM *bn);
+
+// bn_mul_add_words multiples |ap| by |w|, adds the result to |rp|, and places
+// the result in |rp|. |ap| and |rp| must both be |num| words long. It returns
+// the carry word of the operation. |ap| and |rp| may be equal but otherwise may
+// not alias.
+BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
+                          BN_ULONG w);
+
+// bn_mul_words multiples |ap| by |w| and places the result in |rp|. |ap| and
+// |rp| must both be |num| words long. It returns the carry word of the
+// operation. |ap| and |rp| may be equal but otherwise may not alias.
+BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num, BN_ULONG w);
+
+// bn_sqr_words sets |rp[2*i]| and |rp[2*i+1]| to |ap[i]|'s square, for all |i|
+// up to |num|. |ap| is an array of |num| words and |rp| an array of |2*num|
+// words. |ap| and |rp| may not alias.
+//
+// This gives the contribution of the |ap[i]*ap[i]| terms when squaring |ap|.
+void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num);
+
+// bn_add_words adds |ap| to |bp| and places the result in |rp|, each of which
+// are |num| words long. It returns the carry bit, which is one if the operation
+// overflowed and zero otherwise. Any pair of |ap|, |bp|, and |rp| may be equal
+// to each other but otherwise may not alias.
+BN_ULONG bn_add_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+                      size_t num);
+
+// bn_sub_words subtracts |bp| from |ap| and places the result in |rp|. It
+// returns the borrow bit, which is one if the computation underflowed and zero
+// otherwise. Any pair of |ap|, |bp|, and |rp| may be equal to each other but
+// otherwise may not alias.
+BN_ULONG bn_sub_words(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+                      size_t num);
+
+// bn_mul_comba4 sets |r| to the product of |a| and |b|.
+void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]);
+
+// bn_mul_comba8 sets |r| to the product of |a| and |b|.
+void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]);
+
+// bn_sqr_comba8 sets |r| to |a|^2.
+void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[4]);
+
+// bn_sqr_comba4 sets |r| to |a|^2.
+void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]);
+
+// bn_less_than_words returns one if |a| < |b| and zero otherwise, where |a|
+// and |b| both are |len| words long. It runs in constant time.
+int bn_less_than_words(const BN_ULONG *a, const BN_ULONG *b, size_t len);
+
+// bn_in_range_words returns one if |min_inclusive| <= |a| < |max_exclusive|,
+// where |a| and |max_exclusive| both are |len| words long. |a| and
+// |max_exclusive| are treated as secret.
+int bn_in_range_words(const BN_ULONG *a, BN_ULONG min_inclusive,
+                      const BN_ULONG *max_exclusive, size_t len);
+
+// bn_rand_range_words sets |out| to a uniformly distributed random number from
+// |min_inclusive| to |max_exclusive|. Both |out| and |max_exclusive| are |len|
+// words long.
+//
+// This function runs in time independent of the result, but |min_inclusive| and
+// |max_exclusive| are public data. (Information about the range is unavoidably
+// leaked by how many iterations it took to select a number.)
+int bn_rand_range_words(BN_ULONG *out, BN_ULONG min_inclusive,
+                        const BN_ULONG *max_exclusive, size_t len,
+                        const uint8_t additional_data[32]);
+
+// bn_range_secret_range behaves like |BN_rand_range_ex|, but treats
+// |max_exclusive| as secret. Because of this constraint, the distribution of
+// values returned is more complex.
+//
+// Rather than repeatedly generating values until one is in range, which would
+// leak information, it generates one value. If the value is in range, it sets
+// |*out_is_uniform| to one. Otherwise, it sets |*out_is_uniform| to zero,
+// fixing up the value to force it in range.
+//
+// The subset of calls to |bn_rand_secret_range| which set |*out_is_uniform| to
+// one are uniformly distributed in the target range. Calls overall are not.
+// This function is intended for use in situations where the extra values are
+// still usable and where the number of iterations needed to reach the target
+// number of uniform outputs may be blinded for negligible probabilities of
+// timing leaks.
+//
+// Although this function treats |max_exclusive| as secret, it treats the number
+// of bits in |max_exclusive| as public.
+int bn_rand_secret_range(BIGNUM *r, int *out_is_uniform, BN_ULONG min_inclusive,
+                         const BIGNUM *max_exclusive);
+
+#if !defined(OPENSSL_NO_ASM) &&                         \
+    (defined(OPENSSL_X86) || defined(OPENSSL_X86_64) || \
+     defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64))
+#define OPENSSL_BN_ASM_MONT
+// bn_mul_mont writes |ap| * |bp| mod |np| to |rp|, each |num| words
+// long. Inputs and outputs are in Montgomery form. |n0| is a pointer to the
+// corresponding field in |BN_MONT_CTX|. It returns one if |bn_mul_mont| handles
+// inputs of this size and zero otherwise.
+//
+// TODO(davidben): The x86_64 implementation expects a 32-bit input and masks
+// off upper bits. The aarch64 implementation expects a 64-bit input and does
+// not. |size_t| is the safer option but not strictly correct for x86_64. But
+// this function implicitly already has a bound on the size of |num| because it
+// internally creates |num|-sized stack allocation.
+//
+// See also discussion in |ToWord| in abi_test.h for notes on smaller-than-word
+// inputs.
+int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp,
+                const BN_ULONG *np, const BN_ULONG *n0, size_t num);
+#endif
+
+#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
+#define OPENSSL_BN_ASM_MONT5
+
+// bn_mul_mont_gather5 multiples loads index |power| of |table|, multiplies it
+// by |ap| modulo |np|, and stores the result in |rp|. The values are |num|
+// words long and represented in Montgomery form. |n0| is a pointer to the
+// corresponding field in |BN_MONT_CTX|.
+void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap,
+                         const BN_ULONG *table, const BN_ULONG *np,
+                         const BN_ULONG *n0, int num, int power);
+
+// bn_scatter5 stores |inp| to index |power| of |table|. |inp| and each entry of
+// |table| are |num| words long. |power| must be less than 32. |table| must be
+// 32*|num| words long.
+void bn_scatter5(const BN_ULONG *inp, size_t num, BN_ULONG *table,
+                 size_t power);
+
+// bn_gather5 loads index |power| of |table| and stores it in |out|. |out| and
+// each entry of |table| are |num| words long. |power| must be less than 32.
+void bn_gather5(BN_ULONG *out, size_t num, BN_ULONG *table, size_t power);
+
+// bn_power5 squares |ap| five times and multiplies it by the value stored at
+// index |power| of |table|, modulo |np|. It stores the result in |rp|. The
+// values are |num| words long and represented in Montgomery form. |n0| is a
+// pointer to the corresponding field in |BN_MONT_CTX|. |num| must be divisible
+// by 8.
+void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *table,
+               const BN_ULONG *np, const BN_ULONG *n0, int num, int power);
+
+// bn_from_montgomery converts |ap| from Montgomery form modulo |np| and writes
+// the result in |rp|, each of which is |num| words long. It returns one on
+// success and zero if it cannot handle inputs of length |num|. |n0| is a
+// pointer to the corresponding field in |BN_MONT_CTX|.
+int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
+                       const BN_ULONG *not_used, const BN_ULONG *np,
+                       const BN_ULONG *n0, int num);
+#endif  // !OPENSSL_NO_ASM && OPENSSL_X86_64
+
+uint64_t bn_mont_n0(const BIGNUM *n);
+
+// bn_mod_exp_base_2_consttime calculates r = 2**p (mod n). |p| must be larger
+// than log_2(n); i.e. 2**p must be larger than |n|. |n| must be positive and
+// odd. |p| and the bit width of |n| are assumed public, but |n| is otherwise
+// treated as secret.
+int bn_mod_exp_base_2_consttime(BIGNUM *r, unsigned p, const BIGNUM *n,
+                                BN_CTX *ctx);
+
+#if defined(OPENSSL_X86_64) && defined(_MSC_VER)
+#define BN_UMULT_LOHI(low, high, a, b) ((low) = _umul128((a), (b), &(high)))
+#endif
+
+#if !defined(BN_ULLONG) && !defined(BN_UMULT_LOHI)
+#error "Either BN_ULLONG or BN_UMULT_LOHI must be defined on every platform."
+#endif
+
+// bn_jacobi returns the Jacobi symbol of |a| and |b| (which is -1, 0 or 1), or
+// -2 on error.
+int bn_jacobi(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+
+// bn_is_bit_set_words returns one if bit |bit| is set in |a| and zero
+// otherwise.
+int bn_is_bit_set_words(const BN_ULONG *a, size_t num, unsigned bit);
+
+// bn_one_to_montgomery sets |r| to one in Montgomery form. It returns one on
+// success and zero on error. This function treats the bit width of the modulus
+// as public.
+int bn_one_to_montgomery(BIGNUM *r, const BN_MONT_CTX *mont, BN_CTX *ctx);
+
+// bn_less_than_montgomery_R returns one if |bn| is less than the Montgomery R
+// value for |mont| and zero otherwise.
+int bn_less_than_montgomery_R(const BIGNUM *bn, const BN_MONT_CTX *mont);
+
+// bn_mod_u16_consttime returns |bn| mod |d|, ignoring |bn|'s sign bit. It runs
+// in time independent of the value of |bn|, but it treats |d| as public.
+OPENSSL_EXPORT uint16_t bn_mod_u16_consttime(const BIGNUM *bn, uint16_t d);
+
+// bn_odd_number_is_obviously_composite returns one if |bn| is divisible by one
+// of the first several odd primes and zero otherwise.
+int bn_odd_number_is_obviously_composite(const BIGNUM *bn);
+
+// A BN_MILLER_RABIN stores state common to each Miller-Rabin iteration. It is
+// initialized within an existing |BN_CTX| scope and may not be used after
+// that scope is released with |BN_CTX_end|. Field names match those in FIPS
+// 186-4, section C.3.1.
+typedef struct {
+  // w1 is w-1.
+  BIGNUM *w1;
+  // m is (w-1)/2^a.
+  BIGNUM *m;
+  // one_mont is 1 (mod w) in Montgomery form.
+  BIGNUM *one_mont;
+  // w1_mont is w-1 (mod w) in Montgomery form.
+  BIGNUM *w1_mont;
+  // w_bits is BN_num_bits(w).
+  int w_bits;
+  // a is the largest integer such that 2^a divides w-1.
+  int a;
+} BN_MILLER_RABIN;
+
+// bn_miller_rabin_init initializes |miller_rabin| for testing if |mont->N| is
+// prime. It returns one on success and zero on error.
+OPENSSL_EXPORT int bn_miller_rabin_init(BN_MILLER_RABIN *miller_rabin,
+                                        const BN_MONT_CTX *mont, BN_CTX *ctx);
+
+// bn_miller_rabin_iteration performs one Miller-Rabin iteration, checking if
+// |b| is a composite witness for |mont->N|. |miller_rabin| must have been
+// initialized with |bn_miller_rabin_setup|. On success, it returns one and sets
+// |*out_is_possibly_prime| to one if |mont->N| may still be prime or zero if
+// |b| shows it is composite. On allocation or internal failure, it returns
+// zero.
+OPENSSL_EXPORT int bn_miller_rabin_iteration(
+    const BN_MILLER_RABIN *miller_rabin, int *out_is_possibly_prime,
+    const BIGNUM *b, const BN_MONT_CTX *mont, BN_CTX *ctx);
+
+// bn_rshift1_words sets |r| to |a| >> 1, where both arrays are |num| bits wide.
+void bn_rshift1_words(BN_ULONG *r, const BN_ULONG *a, size_t num);
+
+// bn_rshift_words sets |r| to |a| >> |shift|, where both arrays are |num| bits
+// wide.
+void bn_rshift_words(BN_ULONG *r, const BN_ULONG *a, unsigned shift,
+                     size_t num);
+
+// bn_rshift_secret_shift behaves like |BN_rshift| but runs in time independent
+// of both |a| and |n|.
+OPENSSL_EXPORT int bn_rshift_secret_shift(BIGNUM *r, const BIGNUM *a,
+                                          unsigned n, BN_CTX *ctx);
+
+// bn_reduce_once sets |r| to |a| mod |m| where 0 <= |a| < 2*|m|. It returns
+// zero if |a| < |m| and a mask of all ones if |a| >= |m|. Each array is |num|
+// words long, but |a| has an additional word specified by |carry|. |carry| must
+// be zero or one, as implied by the bounds on |a|.
+//
+// |r|, |a|, and |m| may not alias. Use |bn_reduce_once_in_place| if |r| and |a|
+// must alias.
+BN_ULONG bn_reduce_once(BN_ULONG *r, const BN_ULONG *a, BN_ULONG carry,
+                        const BN_ULONG *m, size_t num);
+
+// bn_reduce_once_in_place behaves like |bn_reduce_once| but acts in-place on
+// |r|, using |tmp| as scratch space. |r|, |tmp|, and |m| may not alias.
+BN_ULONG bn_reduce_once_in_place(BN_ULONG *r, BN_ULONG carry, const BN_ULONG *m,
+                                 BN_ULONG *tmp, size_t num);
+
+
+// Constant-time non-modular arithmetic.
+//
+// The following functions implement non-modular arithmetic in constant-time
+// and pessimally set |r->width| to the largest possible word size.
+//
+// Note this means that, e.g., repeatedly multiplying by one will cause widths
+// to increase without bound. The corresponding public API functions minimize
+// their outputs to avoid regressing calculator consumers.
+
+// bn_uadd_consttime behaves like |BN_uadd|, but it pessimally sets
+// |r->width| = |a->width| + |b->width| + 1.
+int bn_uadd_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
+
+// bn_usub_consttime behaves like |BN_usub|, but it pessimally sets
+// |r->width| = |a->width|.
+int bn_usub_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b);
+
+// bn_abs_sub_consttime sets |r| to the absolute value of |a| - |b|, treating
+// both inputs as secret. It returns one on success and zero on error.
+OPENSSL_EXPORT int bn_abs_sub_consttime(BIGNUM *r, const BIGNUM *a,
+                                        const BIGNUM *b, BN_CTX *ctx);
+
+// bn_mul_consttime behaves like |BN_mul|, but it rejects negative inputs and
+// pessimally sets |r->width| to |a->width| + |b->width|, to avoid leaking
+// information about |a| and |b|.
+int bn_mul_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx);
+
+// bn_sqrt_consttime behaves like |BN_sqrt|, but it pessimally sets |r->width|
+// to 2*|a->width|, to avoid leaking information about |a| and |b|.
+int bn_sqr_consttime(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx);
+
+// bn_div_consttime behaves like |BN_div|, but it rejects negative inputs and
+// treats both inputs, including their magnitudes, as secret. It is, as a
+// result, much slower than |BN_div| and should only be used for rare operations
+// where Montgomery reduction is not available.
+//
+// Note that |quotient->width| will be set pessimally to |numerator->width|.
+OPENSSL_EXPORT int bn_div_consttime(BIGNUM *quotient, BIGNUM *remainder,
+                                    const BIGNUM *numerator,
+                                    const BIGNUM *divisor, BN_CTX *ctx);
+
+// bn_is_relatively_prime checks whether GCD(|x|, |y|) is one. On success, it
+// returns one and sets |*out_relatively_prime| to one if the GCD was one and
+// zero otherwise. On error, it returns zero.
+OPENSSL_EXPORT int bn_is_relatively_prime(int *out_relatively_prime,
+                                          const BIGNUM *x, const BIGNUM *y,
+                                          BN_CTX *ctx);
+
+// bn_lcm_consttime sets |r| to LCM(|a|, |b|). It returns one and success and
+// zero on error. |a| and |b| are both treated as secret.
+OPENSSL_EXPORT int bn_lcm_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                                    BN_CTX *ctx);
+
+
+// Constant-time modular arithmetic.
+//
+// The following functions implement basic constant-time modular arithmetic.
+
+// bn_mod_add_words sets |r| to |a| + |b| (mod |m|), using |tmp| as scratch
+// space. Each array is |num| words long. |a| and |b| must be < |m|. Any pair of
+// |r|, |a|, and |b| may alias.
+void bn_mod_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
+                      const BN_ULONG *m, BN_ULONG *tmp, size_t num);
+
+// bn_mod_add_consttime acts like |BN_mod_add_quick| but takes a |BN_CTX|.
+int bn_mod_add_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                         const BIGNUM *m, BN_CTX *ctx);
+
+// bn_mod_sub_words sets |r| to |a| - |b| (mod |m|), using |tmp| as scratch
+// space. Each array is |num| words long. |a| and |b| must be < |m|. Any pair of
+// |r|, |a|, and |b| may alias.
+void bn_mod_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
+                      const BN_ULONG *m, BN_ULONG *tmp, size_t num);
+
+// bn_mod_sub_consttime acts like |BN_mod_sub_quick| but takes a |BN_CTX|.
+int bn_mod_sub_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                         const BIGNUM *m, BN_CTX *ctx);
+
+// bn_mod_lshift1_consttime acts like |BN_mod_lshift1_quick| but takes a
+// |BN_CTX|.
+int bn_mod_lshift1_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *m,
+                             BN_CTX *ctx);
+
+// bn_mod_lshift_consttime acts like |BN_mod_lshift_quick| but takes a |BN_CTX|.
+int bn_mod_lshift_consttime(BIGNUM *r, const BIGNUM *a, int n, const BIGNUM *m,
+                            BN_CTX *ctx);
+
+// bn_mod_inverse_consttime sets |r| to |a|^-1, mod |n|. |a| must be non-
+// negative and less than |n|. It returns one on success and zero on error. On
+// failure, if the failure was caused by |a| having no inverse mod |n| then
+// |*out_no_inverse| will be set to one; otherwise it will be set to zero.
+//
+// This function treats both |a| and |n| as secret, provided they are both non-
+// zero and the inverse exists. It should only be used for even moduli where
+// none of the less general implementations are applicable.
+OPENSSL_EXPORT int bn_mod_inverse_consttime(BIGNUM *r, int *out_no_inverse,
+                                            const BIGNUM *a, const BIGNUM *n,
+                                            BN_CTX *ctx);
+
+// bn_mod_inverse_prime sets |out| to the modular inverse of |a| modulo |p|,
+// computed with Fermat's Little Theorem. It returns one on success and zero on
+// error. If |mont_p| is NULL, one will be computed temporarily.
+int bn_mod_inverse_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
+                         BN_CTX *ctx, const BN_MONT_CTX *mont_p);
+
+// bn_mod_inverse_secret_prime behaves like |bn_mod_inverse_prime| but uses
+// |BN_mod_exp_mont_consttime| instead of |BN_mod_exp_mont| in hopes of
+// protecting the exponent.
+int bn_mod_inverse_secret_prime(BIGNUM *out, const BIGNUM *a, const BIGNUM *p,
+                                BN_CTX *ctx, const BN_MONT_CTX *mont_p);
+
+
+// Low-level operations for small numbers.
+//
+// The following functions implement algorithms suitable for use with scalars
+// and field elements in elliptic curves. They rely on the number being small
+// both to stack-allocate various temporaries and because they do not implement
+// optimizations useful for the larger values used in RSA.
+
+// BN_SMALL_MAX_WORDS is the largest size input these functions handle. This
+// limit allows temporaries to be more easily stack-allocated. This limit is set
+// to accommodate P-521.
+#if defined(OPENSSL_32_BIT)
+#define BN_SMALL_MAX_WORDS 17
+#else
+#define BN_SMALL_MAX_WORDS 9
+#endif
+
+// bn_mul_small sets |r| to |a|*|b|. |num_r| must be |num_a| + |num_b|. |r| may
+// not alias with |a| or |b|.
+void bn_mul_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, size_t num_a,
+                 const BN_ULONG *b, size_t num_b);
+
+// bn_sqr_small sets |r| to |a|^2. |num_a| must be at most |BN_SMALL_MAX_WORDS|.
+// |num_r| must be |num_a|*2. |r| and |a| may not alias.
+void bn_sqr_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, size_t num_a);
+
+// In the following functions, the modulus must be at most |BN_SMALL_MAX_WORDS|
+// words long.
+
+// bn_to_montgomery_small sets |r| to |a| translated to the Montgomery domain.
+// |r| and |a| are |num| words long, which must be |mont->N.width|. |a| must be
+// fully reduced and may alias |r|.
+void bn_to_montgomery_small(BN_ULONG *r, const BN_ULONG *a, size_t num,
+                            const BN_MONT_CTX *mont);
+
+// bn_from_montgomery_small sets |r| to |a| translated out of the Montgomery
+// domain. |r| and |a| are |num_r| and |num_a| words long, respectively. |num_r|
+// must be |mont->N.width|. |a| must be at most |mont->N|^2 and may alias |r|.
+//
+// Unlike most of these functions, only |num_r| is bounded by
+// |BN_SMALL_MAX_WORDS|. |num_a| may exceed it, but must be at most 2 * |num_r|.
+void bn_from_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
+                              size_t num_a, const BN_MONT_CTX *mont);
+
+// bn_mod_mul_montgomery_small sets |r| to |a| * |b| mod |mont->N|. Both inputs
+// and outputs are in the Montgomery domain. Each array is |num| words long,
+// which must be |mont->N.width|. Any two of |r|, |a|, and |b| may alias. |a|
+// and |b| must be reduced on input.
+void bn_mod_mul_montgomery_small(BN_ULONG *r, const BN_ULONG *a,
+                                 const BN_ULONG *b, size_t num,
+                                 const BN_MONT_CTX *mont);
+
+// bn_mod_exp_mont_small sets |r| to |a|^|p| mod |mont->N|. It returns one on
+// success and zero on programmer or internal error. Both inputs and outputs are
+// in the Montgomery domain. |r| and |a| are |num| words long, which must be
+// |mont->N.width| and at most |BN_SMALL_MAX_WORDS|. |a| must be fully-reduced.
+// This function runs in time independent of |a|, but |p| and |mont->N| are
+// public values. |a| must be fully-reduced and may alias with |r|.
+//
+// Note this function differs from |BN_mod_exp_mont| which uses Montgomery
+// reduction but takes input and output outside the Montgomery domain. Combine
+// this function with |bn_from_montgomery_small| and |bn_to_montgomery_small|
+// if necessary.
+void bn_mod_exp_mont_small(BN_ULONG *r, const BN_ULONG *a, size_t num,
+                           const BN_ULONG *p, size_t num_p,
+                           const BN_MONT_CTX *mont);
+
+// bn_mod_inverse0_prime_mont_small sets |r| to |a|^-1 mod |mont->N|. If |a| is
+// zero, |r| is set to zero. |mont->N| must be a prime. |r| and |a| are |num|
+// words long, which must be |mont->N.width| and at most |BN_SMALL_MAX_WORDS|.
+// |a| must be fully-reduced and may alias |r|. This function runs in time
+// independent of |a|, but |mont->N| is a public value.
+void bn_mod_inverse0_prime_mont_small(BN_ULONG *r, const BN_ULONG *a,
+                                      size_t num, const BN_MONT_CTX *mont);
+
+
+#if defined(__cplusplus)
+}  // extern C
+#endif
+
+#endif  // OPENSSL_HEADER_BN_INTERNAL_H
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/jacobi.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/jacobi.c
@ -0,0 +1,146 @@
+/* ====================================================================
+ * Copyright (c) 1998-2000 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com). */
+
+#include <CBigNumBoringSSL_bn.h>
+
+#include <CBigNumBoringSSL_err.h>
+
+#include "internal.h"
+
+
+// least significant word
+#define BN_lsw(n) (((n)->width == 0) ? (BN_ULONG) 0 : (n)->d[0])
+
+int bn_jacobi(const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
+  // In 'tab', only odd-indexed entries are relevant:
+  // For any odd BIGNUM n,
+  //     tab[BN_lsw(n) & 7]
+  // is $(-1)^{(n^2-1)/8}$ (using TeX notation).
+  // Note that the sign of n does not matter.
+  static const int tab[8] = {0, 1, 0, -1, 0, -1, 0, 1};
+
+  // The Jacobi symbol is only defined for odd modulus.
+  if (!BN_is_odd(b)) {
+    OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
+    return -2;
+  }
+
+  // Require b be positive.
+  if (BN_is_negative(b)) {
+    OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
+    return -2;
+  }
+
+  int ret = -2;
+  BN_CTX_start(ctx);
+  BIGNUM *A = BN_CTX_get(ctx);
+  BIGNUM *B = BN_CTX_get(ctx);
+  if (B == NULL) {
+    goto end;
+  }
+
+  if (!BN_copy(A, a) ||
+      !BN_copy(B, b)) {
+    goto end;
+  }
+
+  // Adapted from logic to compute the Kronecker symbol, originally implemented
+  // according to Henri Cohen, "A Course in Computational Algebraic Number
+  // Theory" (algorithm 1.4.10).
+
+  ret = 1;
+
+  while (1) {
+    // Cohen's step 3:
+
+    // B is positive and odd
+    if (BN_is_zero(A)) {
+      ret = BN_is_one(B) ? ret : 0;
+      goto end;
+    }
+
+    // now A is non-zero
+    int i = 0;
+    while (!BN_is_bit_set(A, i)) {
+      i++;
+    }
+    if (!BN_rshift(A, A, i)) {
+      ret = -2;
+      goto end;
+    }
+    if (i & 1) {
+      // i is odd
+      // multiply 'ret' by  $(-1)^{(B^2-1)/8}$
+      ret = ret * tab[BN_lsw(B) & 7];
+    }
+
+    // Cohen's step 4:
+    // multiply 'ret' by  $(-1)^{(A-1)(B-1)/4}$
+    if ((A->neg ? ~BN_lsw(A) : BN_lsw(A)) & BN_lsw(B) & 2) {
+      ret = -ret;
+    }
+
+    // (A, B) := (B mod |A|, |A|)
+    if (!BN_nnmod(B, B, A, ctx)) {
+      ret = -2;
+      goto end;
+    }
+    BIGNUM *tmp = A;
+    A = B;
+    B = tmp;
+    tmp->neg = 0;
+  }
+
+end:
+  BN_CTX_end(ctx);
+  return ret;
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/montgomery.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/montgomery.c
@ -0,0 +1,502 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+/* ====================================================================
+ * Copyright (c) 1998-2006 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com). */
+
+#include <CBigNumBoringSSL_bn.h>
+
+#include <assert.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <CBigNumBoringSSL_err.h>
+#include <CBigNumBoringSSL_mem.h>
+#include <CBigNumBoringSSL_thread.h>
+#include <CBigNumBoringSSL_type_check.h>
+
+#include "internal.h"
+#include "../../internal.h"
+
+
+BN_MONT_CTX *BN_MONT_CTX_new(void) {
+  BN_MONT_CTX *ret = OPENSSL_malloc(sizeof(BN_MONT_CTX));
+
+  if (ret == NULL) {
+    return NULL;
+  }
+
+  OPENSSL_memset(ret, 0, sizeof(BN_MONT_CTX));
+  BN_init(&ret->RR);
+  BN_init(&ret->N);
+
+  return ret;
+}
+
+void BN_MONT_CTX_free(BN_MONT_CTX *mont) {
+  if (mont == NULL) {
+    return;
+  }
+
+  BN_free(&mont->RR);
+  BN_free(&mont->N);
+  OPENSSL_free(mont);
+}
+
+BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, const BN_MONT_CTX *from) {
+  if (to == from) {
+    return to;
+  }
+
+  if (!BN_copy(&to->RR, &from->RR) ||
+      !BN_copy(&to->N, &from->N)) {
+    return NULL;
+  }
+  to->n0[0] = from->n0[0];
+  to->n0[1] = from->n0[1];
+  return to;
+}
+
+static int bn_mont_ctx_set_N_and_n0(BN_MONT_CTX *mont, const BIGNUM *mod) {
+  if (BN_is_zero(mod)) {
+    OPENSSL_PUT_ERROR(BN, BN_R_DIV_BY_ZERO);
+    return 0;
+  }
+  if (!BN_is_odd(mod)) {
+    OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
+    return 0;
+  }
+  if (BN_is_negative(mod)) {
+    OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
+    return 0;
+  }
+
+  // Save the modulus.
+  if (!BN_copy(&mont->N, mod)) {
+    OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
+    return 0;
+  }
+  // |mont->N| is always stored minimally. Computing RR efficiently leaks the
+  // size of the modulus. While the modulus may be private in RSA (one of the
+  // primes), their sizes are public, so this is fine.
+  bn_set_minimal_width(&mont->N);
+
+  // Find n0 such that n0 * N == -1 (mod r).
+  //
+  // Only certain BN_BITS2<=32 platforms actually make use of n0[1]. For the
+  // others, we could use a shorter R value and use faster |BN_ULONG|-based
+  // math instead of |uint64_t|-based math, which would be double-precision.
+  // However, currently only the assembler files know which is which.
+  OPENSSL_STATIC_ASSERT(BN_MONT_CTX_N0_LIMBS == 1 || BN_MONT_CTX_N0_LIMBS == 2,
+                        "BN_MONT_CTX_N0_LIMBS value is invalid");
+  OPENSSL_STATIC_ASSERT(
+      sizeof(BN_ULONG) * BN_MONT_CTX_N0_LIMBS == sizeof(uint64_t),
+      "uint64_t is insufficient precision for n0");
+  uint64_t n0 = bn_mont_n0(&mont->N);
+  mont->n0[0] = (BN_ULONG)n0;
+#if BN_MONT_CTX_N0_LIMBS == 2
+  mont->n0[1] = (BN_ULONG)(n0 >> BN_BITS2);
+#else
+  mont->n0[1] = 0;
+#endif
+  return 1;
+}
+
+int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) {
+  if (!bn_mont_ctx_set_N_and_n0(mont, mod)) {
+    return 0;
+  }
+
+  BN_CTX *new_ctx = NULL;
+  if (ctx == NULL) {
+    new_ctx = BN_CTX_new();
+    if (new_ctx == NULL) {
+      return 0;
+    }
+    ctx = new_ctx;
+  }
+
+  // Save RR = R**2 (mod N). R is the smallest power of 2**BN_BITS2 such that R
+  // > mod. Even though the assembly on some 32-bit platforms works with 64-bit
+  // values, using |BN_BITS2| here, rather than |BN_MONT_CTX_N0_LIMBS *
+  // BN_BITS2|, is correct because R**2 will still be a multiple of the latter
+  // as |BN_MONT_CTX_N0_LIMBS| is either one or two.
+  unsigned lgBigR = mont->N.width * BN_BITS2;
+  BN_zero(&mont->RR);
+  int ok = BN_set_bit(&mont->RR, lgBigR * 2) &&
+           BN_mod(&mont->RR, &mont->RR, &mont->N, ctx) &&
+           bn_resize_words(&mont->RR, mont->N.width);
+  BN_CTX_free(new_ctx);
+  return ok;
+}
+
+BN_MONT_CTX *BN_MONT_CTX_new_for_modulus(const BIGNUM *mod, BN_CTX *ctx) {
+  BN_MONT_CTX *mont = BN_MONT_CTX_new();
+  if (mont == NULL ||
+      !BN_MONT_CTX_set(mont, mod, ctx)) {
+    BN_MONT_CTX_free(mont);
+    return NULL;
+  }
+  return mont;
+}
+
+BN_MONT_CTX *BN_MONT_CTX_new_consttime(const BIGNUM *mod, BN_CTX *ctx) {
+  BN_MONT_CTX *mont = BN_MONT_CTX_new();
+  if (mont == NULL ||
+      !bn_mont_ctx_set_N_and_n0(mont, mod)) {
+    goto err;
+  }
+  unsigned lgBigR = mont->N.width * BN_BITS2;
+  if (!bn_mod_exp_base_2_consttime(&mont->RR, lgBigR * 2, &mont->N, ctx) ||
+      !bn_resize_words(&mont->RR, mont->N.width)) {
+    goto err;
+  }
+  return mont;
+
+err:
+  BN_MONT_CTX_free(mont);
+  return NULL;
+}
+
+int BN_MONT_CTX_set_locked(BN_MONT_CTX **pmont, CRYPTO_MUTEX *lock,
+                           const BIGNUM *mod, BN_CTX *bn_ctx) {
+  CRYPTO_MUTEX_lock_read(lock);
+  BN_MONT_CTX *ctx = *pmont;
+  CRYPTO_MUTEX_unlock_read(lock);
+
+  if (ctx) {
+    return 1;
+  }
+
+  CRYPTO_MUTEX_lock_write(lock);
+  if (*pmont == NULL) {
+    *pmont = BN_MONT_CTX_new_for_modulus(mod, bn_ctx);
+  }
+  const int ok = *pmont != NULL;
+  CRYPTO_MUTEX_unlock_write(lock);
+  return ok;
+}
+
+int BN_to_montgomery(BIGNUM *ret, const BIGNUM *a, const BN_MONT_CTX *mont,
+                     BN_CTX *ctx) {
+  return BN_mod_mul_montgomery(ret, a, &mont->RR, mont, ctx);
+}
+
+static int bn_from_montgomery_in_place(BN_ULONG *r, size_t num_r, BN_ULONG *a,
+                                       size_t num_a, const BN_MONT_CTX *mont) {
+  const BN_ULONG *n = mont->N.d;
+  size_t num_n = mont->N.width;
+  if (num_r != num_n || num_a != 2 * num_n) {
+    OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
+    return 0;
+  }
+
+  // Add multiples of |n| to |r| until R = 2^(nl * BN_BITS2) divides it. On
+  // input, we had |r| < |n| * R, so now |r| < 2 * |n| * R. Note that |r|
+  // includes |carry| which is stored separately.
+  BN_ULONG n0 = mont->n0[0];
+  BN_ULONG carry = 0;
+  for (size_t i = 0; i < num_n; i++) {
+    BN_ULONG v = bn_mul_add_words(a + i, n, num_n, a[i] * n0);
+    v += carry + a[i + num_n];
+    carry |= (v != a[i + num_n]);
+    carry &= (v <= a[i + num_n]);
+    a[i + num_n] = v;
+  }
+
+  // Shift |num_n| words to divide by R. We have |a| < 2 * |n|. Note that |a|
+  // includes |carry| which is stored separately.
+  a += num_n;
+
+  // |a| thus requires at most one additional subtraction |n| to be reduced.
+  bn_reduce_once(r, a, carry, n, num_n);
+  return 1;
+}
+
+static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r,
+                                   const BN_MONT_CTX *mont) {
+  if (r->neg) {
+    OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
+    return 0;
+  }
+
+  const BIGNUM *n = &mont->N;
+  if (n->width == 0) {
+    ret->width = 0;
+    return 1;
+  }
+
+  int max = 2 * n->width;  // carry is stored separately
+  if (!bn_resize_words(r, max) ||
+      !bn_wexpand(ret, n->width)) {
+    return 0;
+  }
+
+  ret->width = n->width;
+  ret->neg = 0;
+  return bn_from_montgomery_in_place(ret->d, ret->width, r->d, r->width, mont);
+}
+
+int BN_from_montgomery(BIGNUM *r, const BIGNUM *a, const BN_MONT_CTX *mont,
+                       BN_CTX *ctx) {
+  int ret = 0;
+  BIGNUM *t;
+
+  BN_CTX_start(ctx);
+  t = BN_CTX_get(ctx);
+  if (t == NULL ||
+      !BN_copy(t, a)) {
+    goto err;
+  }
+
+  ret = BN_from_montgomery_word(r, t, mont);
+
+err:
+  BN_CTX_end(ctx);
+
+  return ret;
+}
+
+int bn_one_to_montgomery(BIGNUM *r, const BN_MONT_CTX *mont, BN_CTX *ctx) {
+  // If the high bit of |n| is set, R = 2^(width*BN_BITS2) < 2 * |n|, so we
+  // compute R - |n| rather than perform Montgomery reduction.
+  const BIGNUM *n = &mont->N;
+  if (n->width > 0 && (n->d[n->width - 1] >> (BN_BITS2 - 1)) != 0) {
+    if (!bn_wexpand(r, n->width)) {
+      return 0;
+    }
+    r->d[0] = 0 - n->d[0];
+    for (int i = 1; i < n->width; i++) {
+      r->d[i] = ~n->d[i];
+    }
+    r->width = n->width;
+    r->neg = 0;
+    return 1;
+  }
+
+  return BN_from_montgomery(r, &mont->RR, mont, ctx);
+}
+
+static int bn_mod_mul_montgomery_fallback(BIGNUM *r, const BIGNUM *a,
+                                          const BIGNUM *b,
+                                          const BN_MONT_CTX *mont,
+                                          BN_CTX *ctx) {
+  int ret = 0;
+
+  BN_CTX_start(ctx);
+  BIGNUM *tmp = BN_CTX_get(ctx);
+  if (tmp == NULL) {
+    goto err;
+  }
+
+  if (a == b) {
+    if (!bn_sqr_consttime(tmp, a, ctx)) {
+      goto err;
+    }
+  } else {
+    if (!bn_mul_consttime(tmp, a, b, ctx)) {
+      goto err;
+    }
+  }
+
+  // reduce from aRR to aR
+  if (!BN_from_montgomery_word(r, tmp, mont)) {
+    goto err;
+  }
+
+  ret = 1;
+
+err:
+  BN_CTX_end(ctx);
+  return ret;
+}
+
+int BN_mod_mul_montgomery(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                          const BN_MONT_CTX *mont, BN_CTX *ctx) {
+  if (a->neg || b->neg) {
+    OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
+    return 0;
+  }
+
+#if defined(OPENSSL_BN_ASM_MONT)
+  // |bn_mul_mont| requires at least 128 bits of limbs, at least for x86.
+  int num = mont->N.width;
+  if (num >= (128 / BN_BITS2) &&
+      a->width == num &&
+      b->width == num) {
+    if (!bn_wexpand(r, num)) {
+      return 0;
+    }
+    if (!bn_mul_mont(r->d, a->d, b->d, mont->N.d, mont->n0, num)) {
+      // The check above ensures this won't happen.
+      assert(0);
+      OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
+      return 0;
+    }
+    r->neg = 0;
+    r->width = num;
+    return 1;
+  }
+#endif
+
+  return bn_mod_mul_montgomery_fallback(r, a, b, mont, ctx);
+}
+
+int bn_less_than_montgomery_R(const BIGNUM *bn, const BN_MONT_CTX *mont) {
+  return !BN_is_negative(bn) &&
+         bn_fits_in_words(bn, mont->N.width);
+}
+
+void bn_to_montgomery_small(BN_ULONG *r, const BN_ULONG *a, size_t num,
+                            const BN_MONT_CTX *mont) {
+  bn_mod_mul_montgomery_small(r, a, mont->RR.d, num, mont);
+}
+
+void bn_from_montgomery_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
+                              size_t num_a, const BN_MONT_CTX *mont) {
+  if (num_r != (size_t)mont->N.width || num_r > BN_SMALL_MAX_WORDS ||
+      num_a > 2 * num_r) {
+    abort();
+  }
+  BN_ULONG tmp[BN_SMALL_MAX_WORDS * 2] = {0};
+  OPENSSL_memcpy(tmp, a, num_a * sizeof(BN_ULONG));
+  if (!bn_from_montgomery_in_place(r, num_r, tmp, 2 * num_r, mont)) {
+    abort();
+  }
+  OPENSSL_cleanse(tmp, 2 * num_r * sizeof(BN_ULONG));
+}
+
+void bn_mod_mul_montgomery_small(BN_ULONG *r, const BN_ULONG *a,
+                                 const BN_ULONG *b, size_t num,
+                                 const BN_MONT_CTX *mont) {
+  if (num != (size_t)mont->N.width || num > BN_SMALL_MAX_WORDS) {
+    abort();
+  }
+
+#if defined(OPENSSL_BN_ASM_MONT)
+  // |bn_mul_mont| requires at least 128 bits of limbs, at least for x86.
+  if (num >= (128 / BN_BITS2)) {
+    if (!bn_mul_mont(r, a, b, mont->N.d, mont->n0, num)) {
+      abort();  // The check above ensures this won't happen.
+    }
+    return;
+  }
+#endif
+
+  // Compute the product.
+  BN_ULONG tmp[2 * BN_SMALL_MAX_WORDS];
+  if (a == b) {
+    bn_sqr_small(tmp, 2 * num, a, num);
+  } else {
+    bn_mul_small(tmp, 2 * num, a, num, b, num);
+  }
+
+  // Reduce.
+  if (!bn_from_montgomery_in_place(r, num, tmp, 2 * num, mont)) {
+    abort();
+  }
+  OPENSSL_cleanse(tmp, 2 * num * sizeof(BN_ULONG));
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/montgomery_inv.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/montgomery_inv.c
@ -0,0 +1,186 @@
+/* Copyright 2016 Brian Smith.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <CBigNumBoringSSL_bn.h>
+
+#include <assert.h>
+
+#include "internal.h"
+#include "../../internal.h"
+
+
+static uint64_t bn_neg_inv_mod_r_u64(uint64_t n);
+
+OPENSSL_STATIC_ASSERT(BN_MONT_CTX_N0_LIMBS == 1 || BN_MONT_CTX_N0_LIMBS == 2,
+                      "BN_MONT_CTX_N0_LIMBS value is invalid");
+OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) * BN_MONT_CTX_N0_LIMBS ==
+                          sizeof(uint64_t),
+                      "uint64_t is insufficient precision for n0");
+
+// LG_LITTLE_R is log_2(r).
+#define LG_LITTLE_R (BN_MONT_CTX_N0_LIMBS * BN_BITS2)
+
+uint64_t bn_mont_n0(const BIGNUM *n) {
+  // These conditions are checked by the caller, |BN_MONT_CTX_set| or
+  // |BN_MONT_CTX_new_consttime|.
+  assert(!BN_is_zero(n));
+  assert(!BN_is_negative(n));
+  assert(BN_is_odd(n));
+
+  // r == 2**(BN_MONT_CTX_N0_LIMBS * BN_BITS2) and LG_LITTLE_R == lg(r). This
+  // ensures that we can do integer division by |r| by simply ignoring
+  // |BN_MONT_CTX_N0_LIMBS| limbs. Similarly, we can calculate values modulo
+  // |r| by just looking at the lowest |BN_MONT_CTX_N0_LIMBS| limbs. This is
+  // what makes Montgomery multiplication efficient.
+  //
+  // As shown in Algorithm 1 of "Fast Prime Field Elliptic Curve Cryptography
+  // with 256 Bit Primes" by Shay Gueron and Vlad Krasnov, in the loop of a
+  // multi-limb Montgomery multiplication of |a * b (mod n)|, given the
+  // unreduced product |t == a * b|, we repeatedly calculate:
+  //
+  //    t1 := t % r         |t1| is |t|'s lowest limb (see previous paragraph).
+  //    t2 := t1*n0*n
+  //    t3 := t + t2
+  //    t := t3 / r         copy all limbs of |t3| except the lowest to |t|.
+  //
+  // In the last step, it would only make sense to ignore the lowest limb of
+  // |t3| if it were zero. The middle steps ensure that this is the case:
+  //
+  //                            t3 ==  0 (mod r)
+  //                        t + t2 ==  0 (mod r)
+  //                   t + t1*n0*n ==  0 (mod r)
+  //                       t1*n0*n == -t (mod r)
+  //                        t*n0*n == -t (mod r)
+  //                          n0*n == -1 (mod r)
+  //                            n0 == -1/n (mod r)
+  //
+  // Thus, in each iteration of the loop, we multiply by the constant factor
+  // |n0|, the negative inverse of n (mod r).
+
+  // n_mod_r = n % r. As explained above, this is done by taking the lowest
+  // |BN_MONT_CTX_N0_LIMBS| limbs of |n|.
+  uint64_t n_mod_r = n->d[0];
+#if BN_MONT_CTX_N0_LIMBS == 2
+  if (n->width > 1) {
+    n_mod_r |= (uint64_t)n->d[1] << BN_BITS2;
+  }
+#endif
+
+  return bn_neg_inv_mod_r_u64(n_mod_r);
+}
+
+// bn_neg_inv_r_mod_n_u64 calculates the -1/n mod r; i.e. it calculates |v|
+// such that u*r - v*n == 1. |r| is the constant defined in |bn_mont_n0|. |n|
+// must be odd.
+//
+// This is derived from |xbinGCD| in Henry S. Warren, Jr.'s "Montgomery
+// Multiplication" (http://www.hackersdelight.org/MontgomeryMultiplication.pdf).
+// It is very similar to the MODULAR-INVERSE function in Stephen R. Dussé's and
+// Burton S. Kaliski Jr.'s "A Cryptographic Library for the Motorola DSP56000"
+// (http://link.springer.com/chapter/10.1007%2F3-540-46877-3_21).
+//
+// This is inspired by Joppe W. Bos's "Constant Time Modular Inversion"
+// (http://www.joppebos.com/files/CTInversion.pdf) so that the inversion is
+// constant-time with respect to |n|. We assume uint64_t additions,
+// subtractions, shifts, and bitwise operations are all constant time, which
+// may be a large leap of faith on 32-bit targets. We avoid division and
+// multiplication, which tend to be the most problematic in terms of timing
+// leaks.
+//
+// Most GCD implementations return values such that |u*r + v*n == 1|, so the
+// caller would have to negate the resultant |v| for the purpose of Montgomery
+// multiplication. This implementation does the negation implicitly by doing
+// the computations as a difference instead of a sum.
+static uint64_t bn_neg_inv_mod_r_u64(uint64_t n) {
+  assert(n % 2 == 1);
+
+  // alpha == 2**(lg r - 1) == r / 2.
+  static const uint64_t alpha = UINT64_C(1) << (LG_LITTLE_R - 1);
+
+  const uint64_t beta = n;
+
+  uint64_t u = 1;
+  uint64_t v = 0;
+
+  // The invariant maintained from here on is:
+  // 2**(lg r - i) == u*2*alpha - v*beta.
+  for (size_t i = 0; i < LG_LITTLE_R; ++i) {
+#if BN_BITS2 == 64 && defined(BN_ULLONG)
+    assert((BN_ULLONG)(1) << (LG_LITTLE_R - i) ==
+           ((BN_ULLONG)u * 2 * alpha) - ((BN_ULLONG)v * beta));
+#endif
+
+    // Delete a common factor of 2 in u and v if |u| is even. Otherwise, set
+    // |u = (u + beta) / 2| and |v = (v / 2) + alpha|.
+
+    uint64_t u_is_odd = UINT64_C(0) - (u & 1);  // Either 0xff..ff or 0.
+
+    // The addition can overflow, so use Dietz's method for it.
+    //
+    // Dietz calculates (x+y)/2 by (x⊕y)>>1 + x&y. This is valid for all
+    // (unsigned) x and y, even when x+y overflows. Evidence for 32-bit values
+    // (embedded in 64 bits to so that overflow can be ignored):
+    //
+    // (declare-fun x () (_ BitVec 64))
+    // (declare-fun y () (_ BitVec 64))
+    // (assert (let (
+    //    (one (_ bv1 64))
+    //    (thirtyTwo (_ bv32 64)))
+    //    (and
+    //      (bvult x (bvshl one thirtyTwo))
+    //      (bvult y (bvshl one thirtyTwo))
+    //      (not (=
+    //        (bvadd (bvlshr (bvxor x y) one) (bvand x y))
+    //        (bvlshr (bvadd x y) one)))
+    // )))
+    // (check-sat)
+    uint64_t beta_if_u_is_odd = beta & u_is_odd;  // Either |beta| or 0.
+    u = ((u ^ beta_if_u_is_odd) >> 1) + (u & beta_if_u_is_odd);
+
+    uint64_t alpha_if_u_is_odd = alpha & u_is_odd;  // Either |alpha| or 0.
+    v = (v >> 1) + alpha_if_u_is_odd;
+  }
+
+  // The invariant now shows that u*r - v*n == 1 since r == 2 * alpha.
+#if BN_BITS2 == 64 && defined(BN_ULLONG)
+  assert(1 == ((BN_ULLONG)u * 2 * alpha) - ((BN_ULLONG)v * beta));
+#endif
+
+  return v;
+}
+
+int bn_mod_exp_base_2_consttime(BIGNUM *r, unsigned p, const BIGNUM *n,
+                                BN_CTX *ctx) {
+  assert(!BN_is_zero(n));
+  assert(!BN_is_negative(n));
+  assert(BN_is_odd(n));
+
+  BN_zero(r);
+
+  unsigned n_bits = BN_num_bits(n);
+  assert(n_bits != 0);
+  assert(p > n_bits);
+  if (n_bits == 1) {
+    return 1;
+  }
+
+  // Set |r| to the larger power of two smaller than |n|, then shift with
+  // reductions the rest of the way.
+  if (!BN_set_bit(r, n_bits - 1) ||
+      !bn_mod_lshift_consttime(r, r, p - (n_bits - 1), n, ctx)) {
+    return 0;
+  }
+
+  return 1;
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/mul.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/mul.c
@ -0,0 +1,749 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.] */
+
+#include <CBigNumBoringSSL_bn.h>
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include <CBigNumBoringSSL_err.h>
+#include <CBigNumBoringSSL_mem.h>
+#include <CBigNumBoringSSL_type_check.h>
+
+#include "internal.h"
+#include "../../internal.h"
+
+
+#define BN_MUL_RECURSIVE_SIZE_NORMAL 16
+#define BN_SQR_RECURSIVE_SIZE_NORMAL BN_MUL_RECURSIVE_SIZE_NORMAL
+
+
+static void bn_abs_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
+                             size_t num, BN_ULONG *tmp) {
+  BN_ULONG borrow = bn_sub_words(tmp, a, b, num);
+  bn_sub_words(r, b, a, num);
+  bn_select_words(r, 0 - borrow, r /* tmp < 0 */, tmp /* tmp >= 0 */, num);
+}
+
+static void bn_mul_normal(BN_ULONG *r, const BN_ULONG *a, size_t na,
+                          const BN_ULONG *b, size_t nb) {
+  if (na < nb) {
+    size_t itmp = na;
+    na = nb;
+    nb = itmp;
+    const BN_ULONG *ltmp = a;
+    a = b;
+    b = ltmp;
+  }
+  BN_ULONG *rr = &(r[na]);
+  if (nb == 0) {
+    OPENSSL_memset(r, 0, na * sizeof(BN_ULONG));
+    return;
+  }
+  rr[0] = bn_mul_words(r, a, na, b[0]);
+
+  for (;;) {
+    if (--nb == 0) {
+      return;
+    }
+    rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
+    if (--nb == 0) {
+      return;
+    }
+    rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
+    if (--nb == 0) {
+      return;
+    }
+    rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
+    if (--nb == 0) {
+      return;
+    }
+    rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
+    rr += 4;
+    r += 4;
+    b += 4;
+  }
+}
+
+// bn_sub_part_words sets |r| to |a| - |b|. It returns the borrow bit, which is
+// one if the operation underflowed and zero otherwise. |cl| is the common
+// length, that is, the shorter of len(a) or len(b). |dl| is the delta length,
+// that is, len(a) - len(b). |r|'s length matches the larger of |a| and |b|, or
+// cl + abs(dl).
+//
+// TODO(davidben): Make this take |size_t|. The |cl| + |dl| calling convention
+// is confusing.
+static BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a,
+                                  const BN_ULONG *b, int cl, int dl) {
+  assert(cl >= 0);
+  BN_ULONG borrow = bn_sub_words(r, a, b, cl);
+  if (dl == 0) {
+    return borrow;
+  }
+
+  r += cl;
+  a += cl;
+  b += cl;
+
+  if (dl < 0) {
+    // |a| is shorter than |b|. Complete the subtraction as if the excess words
+    // in |a| were zeros.
+    dl = -dl;
+    for (int i = 0; i < dl; i++) {
+      r[i] = 0u - b[i] - borrow;
+      borrow |= r[i] != 0;
+    }
+  } else {
+    // |b| is shorter than |a|. Complete the subtraction as if the excess words
+    // in |b| were zeros.
+    for (int i = 0; i < dl; i++) {
+      // |r| and |a| may alias, so use a temporary.
+      BN_ULONG tmp = a[i];
+      r[i] = a[i] - borrow;
+      borrow = tmp < r[i];
+    }
+  }
+
+  return borrow;
+}
+
+// bn_abs_sub_part_words computes |r| = |a| - |b|, storing the absolute value
+// and returning a mask of all ones if the result was negative and all zeros if
+// the result was positive. |cl| and |dl| follow the |bn_sub_part_words| calling
+// convention.
+//
+// TODO(davidben): Make this take |size_t|. The |cl| + |dl| calling convention
+// is confusing.
+static BN_ULONG bn_abs_sub_part_words(BN_ULONG *r, const BN_ULONG *a,
+                                      const BN_ULONG *b, int cl, int dl,
+                                      BN_ULONG *tmp) {
+  BN_ULONG borrow = bn_sub_part_words(tmp, a, b, cl, dl);
+  bn_sub_part_words(r, b, a, cl, -dl);
+  int r_len = cl + (dl < 0 ? -dl : dl);
+  borrow = 0 - borrow;
+  bn_select_words(r, borrow, r /* tmp < 0 */, tmp /* tmp >= 0 */, r_len);
+  return borrow;
+}
+
+int bn_abs_sub_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                         BN_CTX *ctx) {
+  int cl = a->width < b->width ? a->width : b->width;
+  int dl = a->width - b->width;
+  int r_len = a->width < b->width ? b->width : a->width;
+  BN_CTX_start(ctx);
+  BIGNUM *tmp = BN_CTX_get(ctx);
+  int ok = tmp != NULL &&
+           bn_wexpand(r, r_len) &&
+           bn_wexpand(tmp, r_len);
+  if (ok) {
+    bn_abs_sub_part_words(r->d, a->d, b->d, cl, dl, tmp->d);
+    r->width = r_len;
+  }
+  BN_CTX_end(ctx);
+  return ok;
+}
+
+// Karatsuba recursive multiplication algorithm
+// (cf. Knuth, The Art of Computer Programming, Vol. 2)
+
+// bn_mul_recursive sets |r| to |a| * |b|, using |t| as scratch space. |r| has
+// length 2*|n2|, |a| has length |n2| + |dna|, |b| has length |n2| + |dnb|, and
+// |t| has length 4*|n2|. |n2| must be a power of two. Finally, we must have
+// -|BN_MUL_RECURSIVE_SIZE_NORMAL|/2 <= |dna| <= 0 and
+// -|BN_MUL_RECURSIVE_SIZE_NORMAL|/2 <= |dnb| <= 0.
+//
+// TODO(davidben): Simplify and |size_t| the calling convention around lengths
+// here.
+static void bn_mul_recursive(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
+                             int n2, int dna, int dnb, BN_ULONG *t) {
+  // |n2| is a power of two.
+  assert(n2 != 0 && (n2 & (n2 - 1)) == 0);
+  // Check |dna| and |dnb| are in range.
+  assert(-BN_MUL_RECURSIVE_SIZE_NORMAL/2 <= dna && dna <= 0);
+  assert(-BN_MUL_RECURSIVE_SIZE_NORMAL/2 <= dnb && dnb <= 0);
+
+  // Only call bn_mul_comba 8 if n2 == 8 and the
+  // two arrays are complete [steve]
+  if (n2 == 8 && dna == 0 && dnb == 0) {
+    bn_mul_comba8(r, a, b);
+    return;
+  }
+
+  // Else do normal multiply
+  if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
+    bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
+    if (dna + dnb < 0) {
+      OPENSSL_memset(&r[2 * n2 + dna + dnb], 0,
+                     sizeof(BN_ULONG) * -(dna + dnb));
+    }
+    return;
+  }
+
+  // Split |a| and |b| into a0,a1 and b0,b1, where a0 and b0 have size |n|.
+  // Split |t| into t0,t1,t2,t3, each of size |n|, with the remaining 4*|n| used
+  // for recursive calls.
+  // Split |r| into r0,r1,r2,r3. We must contribute a0*b0 to r0,r1, a0*a1+b0*b1
+  // to r1,r2, and a1*b1 to r2,r3. The middle term we will compute as:
+  //
+  //   a0*a1 + b0*b1 = (a0 - a1)*(b1 - b0) + a1*b1 + a0*b0
+  //
+  // Note that we know |n| >= |BN_MUL_RECURSIVE_SIZE_NORMAL|/2 above, so
+  // |tna| and |tnb| are non-negative.
+  int n = n2 / 2, tna = n + dna, tnb = n + dnb;
+
+  // t0 = a0 - a1 and t1 = b1 - b0. The result will be multiplied, so we XOR
+  // their sign masks, giving the sign of (a0 - a1)*(b1 - b0). t0 and t1
+  // themselves store the absolute value.
+  BN_ULONG neg = bn_abs_sub_part_words(t, a, &a[n], tna, n - tna, &t[n2]);
+  neg ^= bn_abs_sub_part_words(&t[n], &b[n], b, tnb, tnb - n, &t[n2]);
+
+  // Compute:
+  // t2,t3 = t0 * t1 = |(a0 - a1)*(b1 - b0)|
+  // r0,r1 = a0 * b0
+  // r2,r3 = a1 * b1
+  if (n == 4 && dna == 0 && dnb == 0) {
+    bn_mul_comba4(&t[n2], t, &t[n]);
+
+    bn_mul_comba4(r, a, b);
+    bn_mul_comba4(&r[n2], &a[n], &b[n]);
+  } else if (n == 8 && dna == 0 && dnb == 0) {
+    bn_mul_comba8(&t[n2], t, &t[n]);
+
+    bn_mul_comba8(r, a, b);
+    bn_mul_comba8(&r[n2], &a[n], &b[n]);
+  } else {
+    BN_ULONG *p = &t[n2 * 2];
+    bn_mul_recursive(&t[n2], t, &t[n], n, 0, 0, p);
+    bn_mul_recursive(r, a, b, n, 0, 0, p);
+    bn_mul_recursive(&r[n2], &a[n], &b[n], n, dna, dnb, p);
+  }
+
+  // t0,t1,c = r0,r1 + r2,r3 = a0*b0 + a1*b1
+  BN_ULONG c = bn_add_words(t, r, &r[n2], n2);
+
+  // t2,t3,c = t0,t1,c + neg*t2,t3 = (a0 - a1)*(b1 - b0) + a1*b1 + a0*b0.
+  // The second term is stored as the absolute value, so we do this with a
+  // constant-time select.
+  BN_ULONG c_neg = c - bn_sub_words(&t[n2 * 2], t, &t[n2], n2);
+  BN_ULONG c_pos = c + bn_add_words(&t[n2], t, &t[n2], n2);
+  bn_select_words(&t[n2], neg, &t[n2 * 2], &t[n2], n2);
+  OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t),
+                        "crypto_word_t is too small");
+  c = constant_time_select_w(neg, c_neg, c_pos);
+
+  // We now have our three components. Add them together.
+  // r1,r2,c = r1,r2 + t2,t3,c
+  c += bn_add_words(&r[n], &r[n], &t[n2], n2);
+
+  // Propagate the carry bit to the end.
+  for (int i = n + n2; i < n2 + n2; i++) {
+    BN_ULONG old = r[i];
+    r[i] = old + c;
+    c = r[i] < old;
+  }
+
+  // The product should fit without carries.
+  assert(c == 0);
+}
+
+// bn_mul_part_recursive sets |r| to |a| * |b|, using |t| as scratch space. |r|
+// has length 4*|n|, |a| has length |n| + |tna|, |b| has length |n| + |tnb|, and
+// |t| has length 8*|n|. |n| must be a power of two. Additionally, we must have
+// 0 <= tna < n and 0 <= tnb < n, and |tna| and |tnb| must differ by at most
+// one.
+//
+// TODO(davidben): Make this take |size_t| and perhaps the actual lengths of |a|
+// and |b|.
+static void bn_mul_part_recursive(BN_ULONG *r, const BN_ULONG *a,
+                                  const BN_ULONG *b, int n, int tna, int tnb,
+                                  BN_ULONG *t) {
+  // |n| is a power of two.
+  assert(n != 0 && (n & (n - 1)) == 0);
+  // Check |tna| and |tnb| are in range.
+  assert(0 <= tna && tna < n);
+  assert(0 <= tnb && tnb < n);
+  assert(-1 <= tna - tnb && tna - tnb <= 1);
+
+  int n2 = n * 2;
+  if (n < 8) {
+    bn_mul_normal(r, a, n + tna, b, n + tnb);
+    OPENSSL_memset(r + n2 + tna + tnb, 0, n2 - tna - tnb);
+    return;
+  }
+
+  // Split |a| and |b| into a0,a1 and b0,b1, where a0 and b0 have size |n|. |a1|
+  // and |b1| have size |tna| and |tnb|, respectively.
+  // Split |t| into t0,t1,t2,t3, each of size |n|, with the remaining 4*|n| used
+  // for recursive calls.
+  // Split |r| into r0,r1,r2,r3. We must contribute a0*b0 to r0,r1, a0*a1+b0*b1
+  // to r1,r2, and a1*b1 to r2,r3. The middle term we will compute as:
+  //
+  //   a0*a1 + b0*b1 = (a0 - a1)*(b1 - b0) + a1*b1 + a0*b0
+
+  // t0 = a0 - a1 and t1 = b1 - b0. The result will be multiplied, so we XOR
+  // their sign masks, giving the sign of (a0 - a1)*(b1 - b0). t0 and t1
+  // themselves store the absolute value.
+  BN_ULONG neg = bn_abs_sub_part_words(t, a, &a[n], tna, n - tna, &t[n2]);
+  neg ^= bn_abs_sub_part_words(&t[n], &b[n], b, tnb, tnb - n, &t[n2]);
+
+  // Compute:
+  // t2,t3 = t0 * t1 = |(a0 - a1)*(b1 - b0)|
+  // r0,r1 = a0 * b0
+  // r2,r3 = a1 * b1
+  if (n == 8) {
+    bn_mul_comba8(&t[n2], t, &t[n]);
+    bn_mul_comba8(r, a, b);
+
+    bn_mul_normal(&r[n2], &a[n], tna, &b[n], tnb);
+    // |bn_mul_normal| only writes |tna| + |tna| words. Zero the rest.
+    OPENSSL_memset(&r[n2 + tna + tnb], 0, sizeof(BN_ULONG) * (n2 - tna - tnb));
+  } else {
+    BN_ULONG *p = &t[n2 * 2];
+    bn_mul_recursive(&t[n2], t, &t[n], n, 0, 0, p);
+    bn_mul_recursive(r, a, b, n, 0, 0, p);
+
+    OPENSSL_memset(&r[n2], 0, sizeof(BN_ULONG) * n2);
+    if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL &&
+        tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
+      bn_mul_normal(&r[n2], &a[n], tna, &b[n], tnb);
+    } else {
+      int i = n;
+      for (;;) {
+        i /= 2;
+        if (i < tna || i < tnb) {
+          // E.g., n == 16, i == 8 and tna == 11. |tna| and |tnb| are within one
+          // of each other, so if |tna| is larger and tna > i, then we know
+          // tnb >= i, and this call is valid.
+          bn_mul_part_recursive(&r[n2], &a[n], &b[n], i, tna - i, tnb - i, p);
+          break;
+        }
+        if (i == tna || i == tnb) {
+          // If there is only a bottom half to the number, just do it. We know
+          // the larger of |tna - i| and |tnb - i| is zero. The other is zero or
+          // -1 by because of |tna| and |tnb| differ by at most one.
+          bn_mul_recursive(&r[n2], &a[n], &b[n], i, tna - i, tnb - i, p);
+          break;
+        }
+
+        // This loop will eventually terminate when |i| falls below
+        // |BN_MUL_RECURSIVE_SIZE_NORMAL| because we know one of |tna| and |tnb|
+        // exceeds that.
+      }
+    }
+  }
+
+  // t0,t1,c = r0,r1 + r2,r3 = a0*b0 + a1*b1
+  BN_ULONG c = bn_add_words(t, r, &r[n2], n2);
+
+  // t2,t3,c = t0,t1,c + neg*t2,t3 = (a0 - a1)*(b1 - b0) + a1*b1 + a0*b0.
+  // The second term is stored as the absolute value, so we do this with a
+  // constant-time select.
+  BN_ULONG c_neg = c - bn_sub_words(&t[n2 * 2], t, &t[n2], n2);
+  BN_ULONG c_pos = c + bn_add_words(&t[n2], t, &t[n2], n2);
+  bn_select_words(&t[n2], neg, &t[n2 * 2], &t[n2], n2);
+  OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t),
+                        "crypto_word_t is too small");
+  c = constant_time_select_w(neg, c_neg, c_pos);
+
+  // We now have our three components. Add them together.
+  // r1,r2,c = r1,r2 + t2,t3,c
+  c += bn_add_words(&r[n], &r[n], &t[n2], n2);
+
+  // Propagate the carry bit to the end.
+  for (int i = n + n2; i < n2 + n2; i++) {
+    BN_ULONG old = r[i];
+    r[i] = old + c;
+    c = r[i] < old;
+  }
+
+  // The product should fit without carries.
+  assert(c == 0);
+}
+
+// bn_mul_impl implements |BN_mul| and |bn_mul_consttime|. Note this function
+// breaks |BIGNUM| invariants and may return a negative zero. This is handled by
+// the callers.
+static int bn_mul_impl(BIGNUM *r, const BIGNUM *a, const BIGNUM *b,
+                       BN_CTX *ctx) {
+  int al = a->width;
+  int bl = b->width;
+  if (al == 0 || bl == 0) {
+    BN_zero(r);
+    return 1;
+  }
+
+  int ret = 0;
+  BIGNUM *rr;
+  BN_CTX_start(ctx);
+  if (r == a || r == b) {
+    rr = BN_CTX_get(ctx);
+    if (rr == NULL) {
+      goto err;
+    }
+  } else {
+    rr = r;
+  }
+  rr->neg = a->neg ^ b->neg;
+
+  int i = al - bl;
+  if (i == 0) {
+    if (al == 8) {
+      if (!bn_wexpand(rr, 16)) {
+        goto err;
+      }
+      rr->width = 16;
+      bn_mul_comba8(rr->d, a->d, b->d);
+      goto end;
+    }
+  }
+
+  int top = al + bl;
+  static const int kMulNormalSize = 16;
+  if (al >= kMulNormalSize && bl >= kMulNormalSize) {
+    if (-1 <= i && i <= 1) {
+      // Find the largest power of two less than or equal to the larger length.
+      int j;
+      if (i >= 0) {
+        j = BN_num_bits_word((BN_ULONG)al);
+      } else {
+        j = BN_num_bits_word((BN_ULONG)bl);
+      }
+      j = 1 << (j - 1);
+      assert(j <= al || j <= bl);
+      BIGNUM *t = BN_CTX_get(ctx);
+      if (t == NULL) {
+        goto err;
+      }
+      if (al > j || bl > j) {
+        // We know |al| and |bl| are at most one from each other, so if al > j,
+        // bl >= j, and vice versa. Thus we can use |bn_mul_part_recursive|.
+        //
+        // TODO(davidben): This codepath is almost unused in standard
+        // algorithms. Is this optimization necessary? See notes in
+        // https://boringssl-review.googlesource.com/q/I0bd604e2cd6a75c266f64476c23a730ca1721ea6
+        assert(al >= j && bl >= j);
+        if (!bn_wexpand(t, j * 8) ||
+            !bn_wexpand(rr, j * 4)) {
+          goto err;
+        }
+        bn_mul_part_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d);
+      } else {
+        // al <= j && bl <= j. Additionally, we know j <= al or j <= bl, so one
+        // of al - j or bl - j is zero. The other, by the bound on |i| above, is
+        // zero or -1. Thus, we can use |bn_mul_recursive|.
+        if (!bn_wexpand(t, j * 4) ||
+            !bn_wexpand(rr, j * 2)) {
+          goto err;
+        }
+        bn_mul_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d);
+      }
+      rr->width = top;
+      goto end;
+    }
+  }
+
+  if (!bn_wexpand(rr, top)) {
+    goto err;
+  }
+  rr->width = top;
+  bn_mul_normal(rr->d, a->d, al, b->d, bl);
+
+end:
+  if (r != rr && !BN_copy(r, rr)) {
+    goto err;
+  }
+  ret = 1;
+
+err:
+  BN_CTX_end(ctx);
+  return ret;
+}
+
+int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
+  if (!bn_mul_impl(r, a, b, ctx)) {
+    return 0;
+  }
+
+  // This additionally fixes any negative zeros created by |bn_mul_impl|.
+  bn_set_minimal_width(r);
+  return 1;
+}
+
+int bn_mul_consttime(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
+  // Prevent negative zeros.
+  if (a->neg || b->neg) {
+    OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
+    return 0;
+  }
+
+  return bn_mul_impl(r, a, b, ctx);
+}
+
+void bn_mul_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, size_t num_a,
+                  const BN_ULONG *b, size_t num_b) {
+  if (num_r != num_a + num_b) {
+    abort();
+  }
+  // TODO(davidben): Should this call |bn_mul_comba4| too? |BN_mul| does not
+  // hit that code.
+  if (num_a == 8 && num_b == 8) {
+    bn_mul_comba8(r, a, b);
+  } else {
+    bn_mul_normal(r, a, num_a, b, num_b);
+  }
+}
+
+// tmp must have 2*n words
+static void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, size_t n,
+                          BN_ULONG *tmp) {
+  if (n == 0) {
+    return;
+  }
+
+  size_t max = n * 2;
+  const BN_ULONG *ap = a;
+  BN_ULONG *rp = r;
+  rp[0] = rp[max - 1] = 0;
+  rp++;
+
+  // Compute the contribution of a[i] * a[j] for all i < j.
+  if (n > 1) {
+    ap++;
+    rp[n - 1] = bn_mul_words(rp, ap, n - 1, ap[-1]);
+    rp += 2;
+  }
+  if (n > 2) {
+    for (size_t i = n - 2; i > 0; i--) {
+      ap++;
+      rp[i] = bn_mul_add_words(rp, ap, i, ap[-1]);
+      rp += 2;
+    }
+  }
+
+  // The final result fits in |max| words, so none of the following operations
+  // will overflow.
+
+  // Double |r|, giving the contribution of a[i] * a[j] for all i != j.
+  bn_add_words(r, r, r, max);
+
+  // Add in the contribution of a[i] * a[i] for all i.
+  bn_sqr_words(tmp, a, n);
+  bn_add_words(r, r, tmp, max);
+}
+
+// bn_sqr_recursive sets |r| to |a|^2, using |t| as scratch space. |r| has
+// length 2*|n2|, |a| has length |n2|, and |t| has length 4*|n2|. |n2| must be
+// a power of two.
+static void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, size_t n2,
+                             BN_ULONG *t) {
+  // |n2| is a power of two.
+  assert(n2 != 0 && (n2 & (n2 - 1)) == 0);
+
+  if (n2 == 4) {
+    bn_sqr_comba4(r, a);
+    return;
+  }
+  if (n2 == 8) {
+    bn_sqr_comba8(r, a);
+    return;
+  }
+  if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL) {
+    bn_sqr_normal(r, a, n2, t);
+    return;
+  }
+
+  // Split |a| into a0,a1, each of size |n|.
+  // Split |t| into t0,t1,t2,t3, each of size |n|, with the remaining 4*|n| used
+  // for recursive calls.
+  // Split |r| into r0,r1,r2,r3. We must contribute a0^2 to r0,r1, 2*a0*a1 to
+  // r1,r2, and a1^2 to r2,r3.
+  size_t n = n2 / 2;
+  BN_ULONG *t_recursive = &t[n2 * 2];
+
+  // t0 = |a0 - a1|.
+  bn_abs_sub_words(t, a, &a[n], n, &t[n]);
+  // t2,t3 = t0^2 = |a0 - a1|^2 = a0^2 - 2*a0*a1 + a1^2
+  bn_sqr_recursive(&t[n2], t, n, t_recursive);
+
+  // r0,r1 = a0^2
+  bn_sqr_recursive(r, a, n, t_recursive);
+
+  // r2,r3 = a1^2
+  bn_sqr_recursive(&r[n2], &a[n], n, t_recursive);
+
+  // t0,t1,c = r0,r1 + r2,r3 = a0^2 + a1^2
+  BN_ULONG c = bn_add_words(t, r, &r[n2], n2);
+  // t2,t3,c = t0,t1,c - t2,t3 = 2*a0*a1
+  c -= bn_sub_words(&t[n2], t, &t[n2], n2);
+
+  // We now have our three components. Add them together.
+  // r1,r2,c = r1,r2 + t2,t3,c
+  c += bn_add_words(&r[n], &r[n], &t[n2], n2);
+
+  // Propagate the carry bit to the end.
+  for (size_t i = n + n2; i < n2 + n2; i++) {
+    BN_ULONG old = r[i];
+    r[i] = old + c;
+    c = r[i] < old;
+  }
+
+  // The square should fit without carries.
+  assert(c == 0);
+}
+
+int BN_mul_word(BIGNUM *bn, BN_ULONG w) {
+  if (!bn->width) {
+    return 1;
+  }
+
+  if (w == 0) {
+    BN_zero(bn);
+    return 1;
+  }
+
+  BN_ULONG ll = bn_mul_words(bn->d, bn->d, bn->width, w);
+  if (ll) {
+    if (!bn_wexpand(bn, bn->width + 1)) {
+      return 0;
+    }
+    bn->d[bn->width++] = ll;
+  }
+
+  return 1;
+}
+
+int bn_sqr_consttime(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) {
+  int al = a->width;
+  if (al <= 0) {
+    r->width = 0;
+    r->neg = 0;
+    return 1;
+  }
+
+  int ret = 0;
+  BN_CTX_start(ctx);
+  BIGNUM *rr = (a != r) ? r : BN_CTX_get(ctx);
+  BIGNUM *tmp = BN_CTX_get(ctx);
+  if (!rr || !tmp) {
+    goto err;
+  }
+
+  int max = 2 * al;  // Non-zero (from above)
+  if (!bn_wexpand(rr, max)) {
+    goto err;
+  }
+
+  if (al == 4) {
+    bn_sqr_comba4(rr->d, a->d);
+  } else if (al == 8) {
+    bn_sqr_comba8(rr->d, a->d);
+  } else {
+    if (al < BN_SQR_RECURSIVE_SIZE_NORMAL) {
+      BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL * 2];
+      bn_sqr_normal(rr->d, a->d, al, t);
+    } else {
+      // If |al| is a power of two, we can use |bn_sqr_recursive|.
+      if (al != 0 && (al & (al - 1)) == 0) {
+        if (!bn_wexpand(tmp, al * 4)) {
+          goto err;
+        }
+        bn_sqr_recursive(rr->d, a->d, al, tmp->d);
+      } else {
+        if (!bn_wexpand(tmp, max)) {
+          goto err;
+        }
+        bn_sqr_normal(rr->d, a->d, al, tmp->d);
+      }
+    }
+  }
+
+  rr->neg = 0;
+  rr->width = max;
+
+  if (rr != r && !BN_copy(r, rr)) {
+    goto err;
+  }
+  ret = 1;
+
+err:
+  BN_CTX_end(ctx);
+  return ret;
+}
+
+int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) {
+  if (!bn_sqr_consttime(r, a, ctx)) {
+    return 0;
+  }
+
+  bn_set_minimal_width(r);
+  return 1;
+}
+
+void bn_sqr_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, size_t num_a) {
+  if (num_r != 2 * num_a || num_a > BN_SMALL_MAX_WORDS) {
+    abort();
+  }
+  if (num_a == 4) {
+    bn_sqr_comba4(r, a);
+  } else if (num_a == 8) {
+    bn_sqr_comba8(r, a);
+  } else {
+    BN_ULONG tmp[2 * BN_SMALL_MAX_WORDS];
+    bn_sqr_normal(r, a, num_a, tmp);
+    OPENSSL_cleanse(tmp, 2 * num_a * sizeof(BN_ULONG));
+  }
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/prime.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/prime.c
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/random.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/random.c
@ -0,0 +1,341 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+/* ====================================================================
+ * Copyright (c) 1998-2001 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com). */
+
+#include <CBigNumBoringSSL_bn.h>
+
+#include <limits.h>
+#include <string.h>
+
+#include <CBigNumBoringSSL_err.h>
+#include <CBigNumBoringSSL_rand.h>
+#include <CBigNumBoringSSL_type_check.h>
+
+#include "internal.h"
+#include "../../internal.h"
+#include "../rand/internal.h"
+
+
+int BN_rand(BIGNUM *rnd, int bits, int top, int bottom) {
+  if (rnd == NULL) {
+    return 0;
+  }
+
+  if (top != BN_RAND_TOP_ANY && top != BN_RAND_TOP_ONE &&
+      top != BN_RAND_TOP_TWO) {
+    OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
+    return 0;
+  }
+
+  if (bottom != BN_RAND_BOTTOM_ANY && bottom != BN_RAND_BOTTOM_ODD) {
+    OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
+    return 0;
+  }
+
+  if (bits == 0) {
+    BN_zero(rnd);
+    return 1;
+  }
+
+  if (bits > INT_MAX - (BN_BITS2 - 1)) {
+    OPENSSL_PUT_ERROR(BN, BN_R_BIGNUM_TOO_LONG);
+    return 0;
+  }
+
+  int words = (bits + BN_BITS2 - 1) / BN_BITS2;
+  int bit = (bits - 1) % BN_BITS2;
+  const BN_ULONG kOne = 1;
+  const BN_ULONG kThree = 3;
+  BN_ULONG mask = bit < BN_BITS2 - 1 ? (kOne << (bit + 1)) - 1 : BN_MASK2;
+  if (!bn_wexpand(rnd, words)) {
+    return 0;
+  }
+
+  RAND_bytes((uint8_t *)rnd->d, words * sizeof(BN_ULONG));
+  rnd->d[words - 1] &= mask;
+  if (top != BN_RAND_TOP_ANY) {
+    if (top == BN_RAND_TOP_TWO && bits > 1) {
+      if (bit == 0) {
+        rnd->d[words - 1] |= 1;
+        rnd->d[words - 2] |= kOne << (BN_BITS2 - 1);
+      } else {
+        rnd->d[words - 1] |= kThree << (bit - 1);
+      }
+    } else {
+      rnd->d[words - 1] |= kOne << bit;
+    }
+  }
+  if (bottom == BN_RAND_BOTTOM_ODD) {
+    rnd->d[0] |= 1;
+  }
+
+  rnd->neg = 0;
+  rnd->width = words;
+  return 1;
+}
+
+int BN_pseudo_rand(BIGNUM *rnd, int bits, int top, int bottom) {
+  return BN_rand(rnd, bits, top, bottom);
+}
+
+// bn_less_than_word_mask returns a mask of all ones if the number represented
+// by |len| words at |a| is less than |b| and zero otherwise. It performs this
+// computation in time independent of the value of |a|. |b| is assumed public.
+static crypto_word_t bn_less_than_word_mask(const BN_ULONG *a, size_t len,
+                                            BN_ULONG b) {
+  if (b == 0) {
+    return CONSTTIME_FALSE_W;
+  }
+  if (len == 0) {
+    return CONSTTIME_TRUE_W;
+  }
+
+  // |a| < |b| iff a[1..len-1] are all zero and a[0] < b.
+  OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t),
+                        "crypto_word_t is too small");
+  crypto_word_t mask = 0;
+  for (size_t i = 1; i < len; i++) {
+    mask |= a[i];
+  }
+  // |mask| is now zero iff a[1..len-1] are all zero.
+  mask = constant_time_is_zero_w(mask);
+  mask &= constant_time_lt_w(a[0], b);
+  return mask;
+}
+
+int bn_in_range_words(const BN_ULONG *a, BN_ULONG min_inclusive,
+                      const BN_ULONG *max_exclusive, size_t len) {
+  crypto_word_t mask = ~bn_less_than_word_mask(a, len, min_inclusive);
+  return mask & bn_less_than_words(a, max_exclusive, len);
+}
+
+static int bn_range_to_mask(size_t *out_words, BN_ULONG *out_mask,
+                            size_t min_inclusive, const BN_ULONG *max_exclusive,
+                            size_t len) {
+  // The magnitude of |max_exclusive| is assumed public.
+  size_t words = len;
+  while (words > 0 && max_exclusive[words - 1] == 0) {
+    words--;
+  }
+  if (words == 0 ||
+      (words == 1 && max_exclusive[0] <= min_inclusive)) {
+    OPENSSL_PUT_ERROR(BN, BN_R_INVALID_RANGE);
+    return 0;
+  }
+  BN_ULONG mask = max_exclusive[words - 1];
+  // This sets all bits in |mask| below the most significant bit.
+  mask |= mask >> 1;
+  mask |= mask >> 2;
+  mask |= mask >> 4;
+  mask |= mask >> 8;
+  mask |= mask >> 16;
+#if defined(OPENSSL_64_BIT)
+  mask |= mask >> 32;
+#endif
+
+  *out_words = words;
+  *out_mask = mask;
+  return 1;
+}
+
+int bn_rand_range_words(BN_ULONG *out, BN_ULONG min_inclusive,
+                        const BN_ULONG *max_exclusive, size_t len,
+                        const uint8_t additional_data[32]) {
+  // This function implements the equivalent of steps 4 through 7 of FIPS 186-4
+  // appendices B.4.2 and B.5.2. When called in those contexts, |max_exclusive|
+  // is n and |min_inclusive| is one.
+
+  // Compute the bit length of |max_exclusive| (step 1), in terms of a number of
+  // |words| worth of entropy to fill and a mask of bits to clear in the top
+  // word.
+  size_t words;
+  BN_ULONG mask;
+  if (!bn_range_to_mask(&words, &mask, min_inclusive, max_exclusive, len)) {
+    return 0;
+  }
+
+  // Fill any unused words with zero.
+  OPENSSL_memset(out + words, 0, (len - words) * sizeof(BN_ULONG));
+
+  unsigned count = 100;
+  do {
+    if (!--count) {
+      OPENSSL_PUT_ERROR(BN, BN_R_TOO_MANY_ITERATIONS);
+      return 0;
+    }
+
+    // Steps 4 and 5. Use |words| and |mask| together to obtain a string of N
+    // bits, where N is the bit length of |max_exclusive|.
+    RAND_bytes_with_additional_data((uint8_t *)out, words * sizeof(BN_ULONG),
+                                    additional_data);
+    out[words - 1] &= mask;
+
+    // If out >= max_exclusive or out < min_inclusive, retry. This implements
+    // the equivalent of steps 6 and 7 without leaking the value of |out|.
+  } while (!bn_in_range_words(out, min_inclusive, max_exclusive, words));
+  return 1;
+}
+
+int BN_rand_range_ex(BIGNUM *r, BN_ULONG min_inclusive,
+                     const BIGNUM *max_exclusive) {
+  static const uint8_t kDefaultAdditionalData[32] = {0};
+  if (!bn_wexpand(r, max_exclusive->width) ||
+      !bn_rand_range_words(r->d, min_inclusive, max_exclusive->d,
+                           max_exclusive->width, kDefaultAdditionalData)) {
+    return 0;
+  }
+
+  r->neg = 0;
+  r->width = max_exclusive->width;
+  return 1;
+}
+
+int bn_rand_secret_range(BIGNUM *r, int *out_is_uniform, BN_ULONG min_inclusive,
+                         const BIGNUM *max_exclusive) {
+  size_t words;
+  BN_ULONG mask;
+  if (!bn_range_to_mask(&words, &mask, min_inclusive, max_exclusive->d,
+                        max_exclusive->width) ||
+      !bn_wexpand(r, words)) {
+    return 0;
+  }
+
+  assert(words > 0);
+  assert(mask != 0);
+  // The range must be large enough for bit tricks to fix invalid values.
+  if (words == 1 && min_inclusive > mask >> 1) {
+    OPENSSL_PUT_ERROR(BN, BN_R_INVALID_RANGE);
+    return 0;
+  }
+
+  // Select a uniform random number with num_bits(max_exclusive) bits.
+  RAND_bytes((uint8_t *)r->d, words * sizeof(BN_ULONG));
+  r->d[words - 1] &= mask;
+
+  // Check, in constant-time, if the value is in range.
+  *out_is_uniform =
+      bn_in_range_words(r->d, min_inclusive, max_exclusive->d, words);
+  crypto_word_t in_range = *out_is_uniform;
+  in_range = 0 - in_range;
+
+  // If the value is not in range, force it to be in range.
+  r->d[0] |= constant_time_select_w(in_range, 0, min_inclusive);
+  r->d[words - 1] &= constant_time_select_w(in_range, BN_MASK2, mask >> 1);
+  assert(bn_in_range_words(r->d, min_inclusive, max_exclusive->d, words));
+
+  r->neg = 0;
+  r->width = words;
+  return 1;
+}
+
+int BN_rand_range(BIGNUM *r, const BIGNUM *range) {
+  return BN_rand_range_ex(r, 0, range);
+}
+
+int BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range) {
+  return BN_rand_range(r, range);
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/rsaz_exp.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/rsaz_exp.c
@ -0,0 +1,226 @@
+/*
+ * Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright (c) 2012, Intel Corporation. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ *
+ * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
+ * (1) Intel Corporation, Israel Development Center, Haifa, Israel
+ * (2) University of Haifa, Israel
+ */
+
+#include "rsaz_exp.h"
+
+#if defined(RSAZ_ENABLED)
+
+#include <CBigNumBoringSSL_mem.h>
+
+#include "internal.h"
+#include "../../internal.h"
+
+
+// one is 1 in RSAZ's representation.
+alignas(64) static const BN_ULONG one[40] = {
+    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+// two80 is 2^80 in RSAZ's representation. Note RSAZ uses base 2^29, so this is
+// 2^(29*2 + 22) = 2^80, not 2^(64*2 + 22).
+alignas(64) static const BN_ULONG two80[40] = {
+    0, 0, 1 << 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0,       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+void RSAZ_1024_mod_exp_avx2(BN_ULONG result_norm[16],
+                            const BN_ULONG base_norm[16],
+                            const BN_ULONG exponent[16],
+                            const BN_ULONG m_norm[16], const BN_ULONG RR[16],
+                            BN_ULONG k0,
+                            BN_ULONG storage[MOD_EXP_CTIME_STORAGE_LEN]) {
+  OPENSSL_STATIC_ASSERT(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH % 64 == 0,
+                        "MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH is too small");
+  assert((uintptr_t)storage % 64 == 0);
+
+  BN_ULONG *a_inv, *m, *result, *table_s = storage + 40 * 3, *R2 = table_s;
+  // Note |R2| aliases |table_s|.
+  if (((((uintptr_t)storage & 4095) + 320) >> 12) != 0) {
+    result = storage;
+    a_inv = storage + 40;
+    m = storage + 40 * 2;  // should not cross page
+  } else {
+    m = storage;  // should not cross page
+    result = storage + 40;
+    a_inv = storage + 40 * 2;
+  }
+
+  rsaz_1024_norm2red_avx2(m, m_norm);
+  rsaz_1024_norm2red_avx2(a_inv, base_norm);
+  rsaz_1024_norm2red_avx2(R2, RR);
+
+  // Convert |R2| from the usual radix, giving R = 2^1024, to RSAZ's radix,
+  // giving R = 2^(36*29) = 2^1044.
+  rsaz_1024_mul_avx2(R2, R2, R2, m, k0);
+  // R2 = 2^2048 * 2^2048 / 2^1044 = 2^3052
+  rsaz_1024_mul_avx2(R2, R2, two80, m, k0);
+  // R2 = 2^3052 * 2^80 / 2^1044 = 2^2088 = (2^1044)^2
+
+  // table[0] = 1
+  rsaz_1024_mul_avx2(result, R2, one, m, k0);
+  // table[1] = a_inv^1
+  rsaz_1024_mul_avx2(a_inv, a_inv, R2, m, k0);
+
+  rsaz_1024_scatter5_avx2(table_s, result, 0);
+  rsaz_1024_scatter5_avx2(table_s, a_inv, 1);
+
+  // table[2] = a_inv^2
+  rsaz_1024_sqr_avx2(result, a_inv, m, k0, 1);
+  rsaz_1024_scatter5_avx2(table_s, result, 2);
+#if 0
+  // This is almost 2x smaller and less than 1% slower.
+  for (int index = 3; index < 32; index++) {
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+    rsaz_1024_scatter5_avx2(table_s, result, index);
+  }
+#else
+  // table[4] = a_inv^4
+  rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+  rsaz_1024_scatter5_avx2(table_s, result, 4);
+  // table[8] = a_inv^8
+  rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+  rsaz_1024_scatter5_avx2(table_s, result, 8);
+  // table[16] = a_inv^16
+  rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+  rsaz_1024_scatter5_avx2(table_s, result, 16);
+  // table[17] = a_inv^17
+  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+  rsaz_1024_scatter5_avx2(table_s, result, 17);
+
+  // table[3]
+  rsaz_1024_gather5_avx2(result, table_s, 2);
+  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+  rsaz_1024_scatter5_avx2(table_s, result, 3);
+  // table[6]
+  rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+  rsaz_1024_scatter5_avx2(table_s, result, 6);
+  // table[12]
+  rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+  rsaz_1024_scatter5_avx2(table_s, result, 12);
+  // table[24]
+  rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+  rsaz_1024_scatter5_avx2(table_s, result, 24);
+  // table[25]
+  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+  rsaz_1024_scatter5_avx2(table_s, result, 25);
+
+  // table[5]
+  rsaz_1024_gather5_avx2(result, table_s, 4);
+  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+  rsaz_1024_scatter5_avx2(table_s, result, 5);
+  // table[10]
+  rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+  rsaz_1024_scatter5_avx2(table_s, result, 10);
+  // table[20]
+  rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+  rsaz_1024_scatter5_avx2(table_s, result, 20);
+  // table[21]
+  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+  rsaz_1024_scatter5_avx2(table_s, result, 21);
+
+  // table[7]
+  rsaz_1024_gather5_avx2(result, table_s, 6);
+  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+  rsaz_1024_scatter5_avx2(table_s, result, 7);
+  // table[14]
+  rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+  rsaz_1024_scatter5_avx2(table_s, result, 14);
+  // table[28]
+  rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+  rsaz_1024_scatter5_avx2(table_s, result, 28);
+  // table[29]
+  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+  rsaz_1024_scatter5_avx2(table_s, result, 29);
+
+  // table[9]
+  rsaz_1024_gather5_avx2(result, table_s, 8);
+  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+  rsaz_1024_scatter5_avx2(table_s, result, 9);
+  // table[18]
+  rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+  rsaz_1024_scatter5_avx2(table_s, result, 18);
+  // table[19]
+  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+  rsaz_1024_scatter5_avx2(table_s, result, 19);
+
+  // table[11]
+  rsaz_1024_gather5_avx2(result, table_s, 10);
+  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+  rsaz_1024_scatter5_avx2(table_s, result, 11);
+  // table[22]
+  rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+  rsaz_1024_scatter5_avx2(table_s, result, 22);
+  // table[23]
+  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+  rsaz_1024_scatter5_avx2(table_s, result, 23);
+
+  // table[13]
+  rsaz_1024_gather5_avx2(result, table_s, 12);
+  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+  rsaz_1024_scatter5_avx2(table_s, result, 13);
+  // table[26]
+  rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+  rsaz_1024_scatter5_avx2(table_s, result, 26);
+  // table[27]
+  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+  rsaz_1024_scatter5_avx2(table_s, result, 27);
+
+  // table[15]
+  rsaz_1024_gather5_avx2(result, table_s, 14);
+  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+  rsaz_1024_scatter5_avx2(table_s, result, 15);
+  // table[30]
+  rsaz_1024_sqr_avx2(result, result, m, k0, 1);
+  rsaz_1024_scatter5_avx2(table_s, result, 30);
+  // table[31]
+  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+  rsaz_1024_scatter5_avx2(table_s, result, 31);
+#endif
+
+  const uint8_t *p_str = (const uint8_t *)exponent;
+
+  // load first window
+  int wvalue = p_str[127] >> 3;
+  rsaz_1024_gather5_avx2(result, table_s, wvalue);
+
+  int index = 1014;
+  while (index > -1) {  // Loop for the remaining 127 windows.
+
+    rsaz_1024_sqr_avx2(result, result, m, k0, 5);
+
+    uint16_t wvalue_16;
+    memcpy(&wvalue_16, &p_str[index / 8], sizeof(wvalue_16));
+    wvalue = wvalue_16;
+    wvalue = (wvalue >> (index % 8)) & 31;
+    index -= 5;
+
+    rsaz_1024_gather5_avx2(a_inv, table_s, wvalue);  // Borrow |a_inv|.
+    rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+  }
+
+  // Square four times.
+  rsaz_1024_sqr_avx2(result, result, m, k0, 4);
+
+  wvalue = p_str[0] & 15;
+
+  rsaz_1024_gather5_avx2(a_inv, table_s, wvalue);  // Borrow |a_inv|.
+  rsaz_1024_mul_avx2(result, result, a_inv, m, k0);
+
+  // Convert from Montgomery.
+  rsaz_1024_mul_avx2(result, result, one, m, k0);
+
+  rsaz_1024_red2norm_avx2(result_norm, result);
+
+  OPENSSL_cleanse(storage, MOD_EXP_CTIME_STORAGE_LEN * sizeof(BN_ULONG));
+}
+
+#endif  // RSAZ_ENABLED
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/rsaz_exp.h
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/rsaz_exp.h
@ -0,0 +1,104 @@
+/*
+ * Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright (c) 2012, Intel Corporation. All Rights Reserved.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ *
+ * Originally written by Shay Gueron (1, 2), and Vlad Krasnov (1)
+ * (1) Intel Corporation, Israel Development Center, Haifa, Israel
+ * (2) University of Haifa, Israel
+ */
+
+#ifndef OPENSSL_HEADER_BN_RSAZ_EXP_H
+#define OPENSSL_HEADER_BN_RSAZ_EXP_H
+
+#include <CBigNumBoringSSL_bn.h>
+#include <CBigNumBoringSSL_cpu.h>
+
+#include "internal.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
+#define RSAZ_ENABLED
+
+
+// RSAZ_1024_mod_exp_avx2 sets |result| to |base_norm| raised to |exponent|
+// modulo |m_norm|. |base_norm| must be fully-reduced and |exponent| must have
+// the high bit set (it is 1024 bits wide). |RR| and |k0| must be |RR| and |n0|,
+// respectively, extracted from |m_norm|'s |BN_MONT_CTX|. |storage_words| is a
+// temporary buffer that must be aligned to |MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH|
+// bytes.
+void RSAZ_1024_mod_exp_avx2(BN_ULONG result[16], const BN_ULONG base_norm[16],
+                            const BN_ULONG exponent[16],
+                            const BN_ULONG m_norm[16], const BN_ULONG RR[16],
+                            BN_ULONG k0,
+                            BN_ULONG storage_words[MOD_EXP_CTIME_STORAGE_LEN]);
+
+OPENSSL_INLINE int rsaz_avx2_capable(void) {
+  const uint32_t *cap = OPENSSL_ia32cap_get();
+  return (cap[2] & (1 << 5)) != 0;  // AVX2
+}
+
+OPENSSL_INLINE int rsaz_avx2_preferred(void) {
+  const uint32_t *cap = OPENSSL_ia32cap_get();
+  static const uint32_t kBMI2AndADX = (1 << 8) | (1 << 19);
+  if ((cap[2] & kBMI2AndADX) == kBMI2AndADX) {
+    // If BMI2 and ADX are available, x86_64-mont5.pl is faster.
+    return 0;
+  }
+  return (cap[2] & (1 << 5)) != 0;  // AVX2
+}
+
+
+// Assembly functions.
+
+// RSAZ represents 1024-bit integers using unsaturated 29-bit limbs stored in
+// 64-bit integers. This requires 36 limbs but padded up to 40.
+//
+// See crypto/bn/asm/rsaz-avx2.pl for further details.
+
+// rsaz_1024_norm2red_avx2 converts |norm| from |BIGNUM| to RSAZ representation
+// and writes the result to |red|.
+void rsaz_1024_norm2red_avx2(BN_ULONG red[40], const BN_ULONG norm[16]);
+
+// rsaz_1024_mul_avx2 computes |a| * |b| mod |n| and writes the result to |ret|.
+// Inputs and outputs are in Montgomery form, using RSAZ's representation. |k|
+// is -|n|^-1 mod 2^64 or |n0| from |BN_MONT_CTX|.
+void rsaz_1024_mul_avx2(BN_ULONG ret[40], const BN_ULONG a[40],
+                        const BN_ULONG b[40], const BN_ULONG n[40], BN_ULONG k);
+
+// rsaz_1024_mul_avx2 computes |a|^(2*|count|) mod |n| and writes the result to
+// |ret|. Inputs and outputs are in Montgomery form, using RSAZ's
+// representation. |k| is -|n|^-1 mod 2^64 or |n0| from |BN_MONT_CTX|.
+void rsaz_1024_sqr_avx2(BN_ULONG ret[40], const BN_ULONG a[40],
+                        const BN_ULONG n[40], BN_ULONG k, int count);
+
+// rsaz_1024_scatter5_avx2 stores |val| at index |i| of |tbl|. |i| must be
+// positive and at most 31. Note the table only uses 18 |BN_ULONG|s per entry
+// instead of 40. It packs two 29-bit limbs into each |BN_ULONG| and only stores
+// 36 limbs rather than the padded 40.
+void rsaz_1024_scatter5_avx2(BN_ULONG tbl[32 * 18], const BN_ULONG val[40],
+                             int i);
+
+// rsaz_1024_gather5_avx2 loads index |i| of |tbl| and writes it to |val|.
+void rsaz_1024_gather5_avx2(BN_ULONG val[40], const BN_ULONG tbl[32 * 18],
+                            int i);
+
+// rsaz_1024_red2norm_avx2 converts |red| from RSAZ to |BIGNUM| representation
+// and writes the result to |norm|.
+void rsaz_1024_red2norm_avx2(BN_ULONG norm[16], const BN_ULONG red[40]);
+
+
+#endif  // !OPENSSL_NO_ASM && OPENSSL_X86_64
+
+#if defined(__cplusplus)
+}  // extern "C"
+#endif
+
+#endif  // OPENSSL_HEADER_BN_RSAZ_EXP_H
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/shift.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/shift.c
@ -0,0 +1,364 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.] */
+
+#include <CBigNumBoringSSL_bn.h>
+
+#include <string.h>
+
+#include <CBigNumBoringSSL_err.h>
+#include <CBigNumBoringSSL_type_check.h>
+
+#include "internal.h"
+
+
+int BN_lshift(BIGNUM *r, const BIGNUM *a, int n) {
+  int i, nw, lb, rb;
+  BN_ULONG *t, *f;
+  BN_ULONG l;
+
+  if (n < 0) {
+    OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
+    return 0;
+  }
+
+  r->neg = a->neg;
+  nw = n / BN_BITS2;
+  if (!bn_wexpand(r, a->width + nw + 1)) {
+    return 0;
+  }
+  lb = n % BN_BITS2;
+  rb = BN_BITS2 - lb;
+  f = a->d;
+  t = r->d;
+  t[a->width + nw] = 0;
+  if (lb == 0) {
+    for (i = a->width - 1; i >= 0; i--) {
+      t[nw + i] = f[i];
+    }
+  } else {
+    for (i = a->width - 1; i >= 0; i--) {
+      l = f[i];
+      t[nw + i + 1] |= l >> rb;
+      t[nw + i] = l << lb;
+    }
+  }
+  OPENSSL_memset(t, 0, nw * sizeof(t[0]));
+  r->width = a->width + nw + 1;
+  bn_set_minimal_width(r);
+
+  return 1;
+}
+
+int BN_lshift1(BIGNUM *r, const BIGNUM *a) {
+  BN_ULONG *ap, *rp, t, c;
+  int i;
+
+  if (r != a) {
+    r->neg = a->neg;
+    if (!bn_wexpand(r, a->width + 1)) {
+      return 0;
+    }
+    r->width = a->width;
+  } else {
+    if (!bn_wexpand(r, a->width + 1)) {
+      return 0;
+    }
+  }
+  ap = a->d;
+  rp = r->d;
+  c = 0;
+  for (i = 0; i < a->width; i++) {
+    t = *(ap++);
+    *(rp++) = (t << 1) | c;
+    c = t >> (BN_BITS2 - 1);
+  }
+  if (c) {
+    *rp = 1;
+    r->width++;
+  }
+
+  return 1;
+}
+
+void bn_rshift_words(BN_ULONG *r, const BN_ULONG *a, unsigned shift,
+                     size_t num) {
+  unsigned shift_bits = shift % BN_BITS2;
+  size_t shift_words = shift / BN_BITS2;
+  if (shift_words >= num) {
+    OPENSSL_memset(r, 0, num * sizeof(BN_ULONG));
+    return;
+  }
+  if (shift_bits == 0) {
+    OPENSSL_memmove(r, a + shift_words, (num - shift_words) * sizeof(BN_ULONG));
+  } else {
+    for (size_t i = shift_words; i < num - 1; i++) {
+      r[i - shift_words] =
+          (a[i] >> shift_bits) | (a[i + 1] << (BN_BITS2 - shift_bits));
+    }
+    r[num - 1 - shift_words] = a[num - 1] >> shift_bits;
+  }
+  OPENSSL_memset(r + num - shift_words, 0, shift_words * sizeof(BN_ULONG));
+}
+
+int BN_rshift(BIGNUM *r, const BIGNUM *a, int n) {
+  if (n < 0) {
+    OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
+    return 0;
+  }
+
+  if (!bn_wexpand(r, a->width)) {
+    return 0;
+  }
+  bn_rshift_words(r->d, a->d, n, a->width);
+  r->neg = a->neg;
+  r->width = a->width;
+  bn_set_minimal_width(r);
+  return 1;
+}
+
+int bn_rshift_secret_shift(BIGNUM *r, const BIGNUM *a, unsigned n,
+                           BN_CTX *ctx) {
+  int ret = 0;
+  BN_CTX_start(ctx);
+  BIGNUM *tmp = BN_CTX_get(ctx);
+  if (tmp == NULL ||
+      !BN_copy(r, a) ||
+      !bn_wexpand(tmp, r->width)) {
+    goto err;
+  }
+
+  // Shift conditionally by powers of two.
+  unsigned max_bits = BN_BITS2 * r->width;
+  for (unsigned i = 0; (max_bits >> i) != 0; i++) {
+    BN_ULONG mask = (n >> i) & 1;
+    mask = 0 - mask;
+    bn_rshift_words(tmp->d, r->d, 1u << i, r->width);
+    bn_select_words(r->d, mask, tmp->d /* apply shift */,
+                    r->d /* ignore shift */, r->width);
+  }
+
+  ret = 1;
+
+err:
+  BN_CTX_end(ctx);
+  return ret;
+}
+
+void bn_rshift1_words(BN_ULONG *r, const BN_ULONG *a, size_t num) {
+  if (num == 0) {
+    return;
+  }
+  for (size_t i = 0; i < num - 1; i++) {
+    r[i] = (a[i] >> 1) | (a[i + 1] << (BN_BITS2 - 1));
+  }
+  r[num - 1] = a[num - 1] >> 1;
+}
+
+int BN_rshift1(BIGNUM *r, const BIGNUM *a) {
+  if (!bn_wexpand(r, a->width)) {
+    return 0;
+  }
+  bn_rshift1_words(r->d, a->d, a->width);
+  r->width = a->width;
+  r->neg = a->neg;
+  bn_set_minimal_width(r);
+  return 1;
+}
+
+int BN_set_bit(BIGNUM *a, int n) {
+  if (n < 0) {
+    return 0;
+  }
+
+  int i = n / BN_BITS2;
+  int j = n % BN_BITS2;
+  if (a->width <= i) {
+    if (!bn_wexpand(a, i + 1)) {
+      return 0;
+    }
+    for (int k = a->width; k < i + 1; k++) {
+      a->d[k] = 0;
+    }
+    a->width = i + 1;
+  }
+
+  a->d[i] |= (((BN_ULONG)1) << j);
+
+  return 1;
+}
+
+int BN_clear_bit(BIGNUM *a, int n) {
+  int i, j;
+
+  if (n < 0) {
+    return 0;
+  }
+
+  i = n / BN_BITS2;
+  j = n % BN_BITS2;
+  if (a->width <= i) {
+    return 0;
+  }
+
+  a->d[i] &= (~(((BN_ULONG)1) << j));
+  bn_set_minimal_width(a);
+  return 1;
+}
+
+int bn_is_bit_set_words(const BN_ULONG *a, size_t num, unsigned bit) {
+  unsigned i = bit / BN_BITS2;
+  unsigned j = bit % BN_BITS2;
+  if (i >= num) {
+    return 0;
+  }
+  return (a[i] >> j) & 1;
+}
+
+int BN_is_bit_set(const BIGNUM *a, int n) {
+  if (n < 0) {
+    return 0;
+  }
+  return bn_is_bit_set_words(a->d, a->width, n);
+}
+
+int BN_mask_bits(BIGNUM *a, int n) {
+  if (n < 0) {
+    return 0;
+  }
+
+  int w = n / BN_BITS2;
+  int b = n % BN_BITS2;
+  if (w >= a->width) {
+    return 1;
+  }
+  if (b == 0) {
+    a->width = w;
+  } else {
+    a->width = w + 1;
+    a->d[w] &= ~(BN_MASK2 << b);
+  }
+
+  bn_set_minimal_width(a);
+  return 1;
+}
+
+static int bn_count_low_zero_bits_word(BN_ULONG l) {
+  OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t),
+                        "crypto_word_t is too small");
+  OPENSSL_STATIC_ASSERT(sizeof(int) <= sizeof(crypto_word_t),
+                        "crypto_word_t is too small");
+  OPENSSL_STATIC_ASSERT(BN_BITS2 == sizeof(BN_ULONG) * 8,
+                        "BN_ULONG has padding bits");
+  // C has very bizarre rules for types smaller than an int.
+  OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) >= sizeof(int),
+                        "BN_ULONG gets promoted to int");
+
+  crypto_word_t mask;
+  int bits = 0;
+
+#if BN_BITS2 > 32
+  // Check if the lower half of |x| are all zero.
+  mask = constant_time_is_zero_w(l << (BN_BITS2 - 32));
+  // If the lower half is all zeros, it is included in the bit count and we
+  // count the upper half. Otherwise, we count the lower half.
+  bits += 32 & mask;
+  l = constant_time_select_w(mask, l >> 32, l);
+#endif
+
+  // The remaining blocks are analogous iterations at lower powers of two.
+  mask = constant_time_is_zero_w(l << (BN_BITS2 - 16));
+  bits += 16 & mask;
+  l = constant_time_select_w(mask, l >> 16, l);
+
+  mask = constant_time_is_zero_w(l << (BN_BITS2 - 8));
+  bits += 8 & mask;
+  l = constant_time_select_w(mask, l >> 8, l);
+
+  mask = constant_time_is_zero_w(l << (BN_BITS2 - 4));
+  bits += 4 & mask;
+  l = constant_time_select_w(mask, l >> 4, l);
+
+  mask = constant_time_is_zero_w(l << (BN_BITS2 - 2));
+  bits += 2 & mask;
+  l = constant_time_select_w(mask, l >> 2, l);
+
+  mask = constant_time_is_zero_w(l << (BN_BITS2 - 1));
+  bits += 1 & mask;
+
+  return bits;
+}
+
+int BN_count_low_zero_bits(const BIGNUM *bn) {
+  OPENSSL_STATIC_ASSERT(sizeof(BN_ULONG) <= sizeof(crypto_word_t),
+                        "crypto_word_t is too small");
+  OPENSSL_STATIC_ASSERT(sizeof(int) <= sizeof(crypto_word_t),
+                        "crypto_word_t is too small");
+
+  int ret = 0;
+  crypto_word_t saw_nonzero = 0;
+  for (int i = 0; i < bn->width; i++) {
+    crypto_word_t nonzero = ~constant_time_is_zero_w(bn->d[i]);
+    crypto_word_t first_nonzero = ~saw_nonzero & nonzero;
+    saw_nonzero |= nonzero;
+
+    int bits = bn_count_low_zero_bits_word(bn->d[i]);
+    ret |= first_nonzero & (i * BN_BITS2 + bits);
+  }
+
+  // If got to the end of |bn| and saw no non-zero words, |bn| is zero. |ret|
+  // will then remain zero.
+  return ret;
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/sqrt.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bn/sqrt.c
@ -0,0 +1,502 @@
+/* Written by Lenka Fibikova <fibikova@exp-math.uni-essen.de>
+ * and Bodo Moeller for the OpenSSL project. */
+/* ====================================================================
+ * Copyright (c) 1998-2000 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer. 
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com). */
+
+#include <CBigNumBoringSSL_bn.h>
+
+#include <CBigNumBoringSSL_err.h>
+
+#include "internal.h"
+
+
+BIGNUM *BN_mod_sqrt(BIGNUM *in, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
+  // Compute a square root of |a| mod |p| using the Tonelli/Shanks algorithm
+  // (cf. Henri Cohen, "A Course in Algebraic Computational Number Theory",
+  // algorithm 1.5.1). |p| is assumed to be a prime.
+
+  BIGNUM *ret = in;
+  int err = 1;
+  int r;
+  BIGNUM *A, *b, *q, *t, *x, *y;
+  int e, i, j;
+
+  if (!BN_is_odd(p) || BN_abs_is_word(p, 1)) {
+    if (BN_abs_is_word(p, 2)) {
+      if (ret == NULL) {
+        ret = BN_new();
+      }
+      if (ret == NULL) {
+        goto end;
+      }
+      if (!BN_set_word(ret, BN_is_bit_set(a, 0))) {
+        if (ret != in) {
+          BN_free(ret);
+        }
+        return NULL;
+      }
+      return ret;
+    }
+
+    OPENSSL_PUT_ERROR(BN, BN_R_P_IS_NOT_PRIME);
+    return (NULL);
+  }
+
+  if (BN_is_zero(a) || BN_is_one(a)) {
+    if (ret == NULL) {
+      ret = BN_new();
+    }
+    if (ret == NULL) {
+      goto end;
+    }
+    if (!BN_set_word(ret, BN_is_one(a))) {
+      if (ret != in) {
+        BN_free(ret);
+      }
+      return NULL;
+    }
+    return ret;
+  }
+
+  BN_CTX_start(ctx);
+  A = BN_CTX_get(ctx);
+  b = BN_CTX_get(ctx);
+  q = BN_CTX_get(ctx);
+  t = BN_CTX_get(ctx);
+  x = BN_CTX_get(ctx);
+  y = BN_CTX_get(ctx);
+  if (y == NULL) {
+    goto end;
+  }
+
+  if (ret == NULL) {
+    ret = BN_new();
+  }
+  if (ret == NULL) {
+    goto end;
+  }
+
+  // A = a mod p
+  if (!BN_nnmod(A, a, p, ctx)) {
+    goto end;
+  }
+
+  // now write  |p| - 1  as  2^e*q  where  q  is odd
+  e = 1;
+  while (!BN_is_bit_set(p, e)) {
+    e++;
+  }
+  // we'll set  q  later (if needed)
+
+  if (e == 1) {
+    // The easy case:  (|p|-1)/2  is odd, so 2 has an inverse
+    // modulo  (|p|-1)/2,  and square roots can be computed
+    // directly by modular exponentiation.
+    // We have
+    //     2 * (|p|+1)/4 == 1   (mod (|p|-1)/2),
+    // so we can use exponent  (|p|+1)/4,  i.e.  (|p|-3)/4 + 1.
+    if (!BN_rshift(q, p, 2)) {
+      goto end;
+    }
+    q->neg = 0;
+    if (!BN_add_word(q, 1) ||
+        !BN_mod_exp_mont(ret, A, q, p, ctx, NULL)) {
+      goto end;
+    }
+    err = 0;
+    goto vrfy;
+  }
+
+  if (e == 2) {
+    // |p| == 5  (mod 8)
+    //
+    // In this case  2  is always a non-square since
+    // Legendre(2,p) = (-1)^((p^2-1)/8)  for any odd prime.
+    // So if  a  really is a square, then  2*a  is a non-square.
+    // Thus for
+    //      b := (2*a)^((|p|-5)/8),
+    //      i := (2*a)*b^2
+    // we have
+    //     i^2 = (2*a)^((1 + (|p|-5)/4)*2)
+    //         = (2*a)^((p-1)/2)
+    //         = -1;
+    // so if we set
+    //      x := a*b*(i-1),
+    // then
+    //     x^2 = a^2 * b^2 * (i^2 - 2*i + 1)
+    //         = a^2 * b^2 * (-2*i)
+    //         = a*(-i)*(2*a*b^2)
+    //         = a*(-i)*i
+    //         = a.
+    //
+    // (This is due to A.O.L. Atkin,
+    // <URL:
+    //http://listserv.nodak.edu/scripts/wa.exe?A2=ind9211&L=nmbrthry&O=T&P=562>,
+    // November 1992.)
+
+    // t := 2*a
+    if (!bn_mod_lshift1_consttime(t, A, p, ctx)) {
+      goto end;
+    }
+
+    // b := (2*a)^((|p|-5)/8)
+    if (!BN_rshift(q, p, 3)) {
+      goto end;
+    }
+    q->neg = 0;
+    if (!BN_mod_exp_mont(b, t, q, p, ctx, NULL)) {
+      goto end;
+    }
+
+    // y := b^2
+    if (!BN_mod_sqr(y, b, p, ctx)) {
+      goto end;
+    }
+
+    // t := (2*a)*b^2 - 1
+    if (!BN_mod_mul(t, t, y, p, ctx) ||
+        !BN_sub_word(t, 1)) {
+      goto end;
+    }
+
+    // x = a*b*t
+    if (!BN_mod_mul(x, A, b, p, ctx) ||
+        !BN_mod_mul(x, x, t, p, ctx)) {
+      goto end;
+    }
+
+    if (!BN_copy(ret, x)) {
+      goto end;
+    }
+    err = 0;
+    goto vrfy;
+  }
+
+  // e > 2, so we really have to use the Tonelli/Shanks algorithm.
+  // First, find some  y  that is not a square.
+  if (!BN_copy(q, p)) {
+    goto end;  // use 'q' as temp
+  }
+  q->neg = 0;
+  i = 2;
+  do {
+    // For efficiency, try small numbers first;
+    // if this fails, try random numbers.
+    if (i < 22) {
+      if (!BN_set_word(y, i)) {
+        goto end;
+      }
+    } else {
+      if (!BN_pseudo_rand(y, BN_num_bits(p), 0, 0)) {
+        goto end;
+      }
+      if (BN_ucmp(y, p) >= 0) {
+        if (!(p->neg ? BN_add : BN_sub)(y, y, p)) {
+          goto end;
+        }
+      }
+      // now 0 <= y < |p|
+      if (BN_is_zero(y)) {
+        if (!BN_set_word(y, i)) {
+          goto end;
+        }
+      }
+    }
+
+    r = bn_jacobi(y, q, ctx);  // here 'q' is |p|
+    if (r < -1) {
+      goto end;
+    }
+    if (r == 0) {
+      // m divides p
+      OPENSSL_PUT_ERROR(BN, BN_R_P_IS_NOT_PRIME);
+      goto end;
+    }
+  } while (r == 1 && ++i < 82);
+
+  if (r != -1) {
+    // Many rounds and still no non-square -- this is more likely
+    // a bug than just bad luck.
+    // Even if  p  is not prime, we should have found some  y
+    // such that r == -1.
+    OPENSSL_PUT_ERROR(BN, BN_R_TOO_MANY_ITERATIONS);
+    goto end;
+  }
+
+  // Here's our actual 'q':
+  if (!BN_rshift(q, q, e)) {
+    goto end;
+  }
+
+  // Now that we have some non-square, we can find an element
+  // of order  2^e  by computing its q'th power.
+  if (!BN_mod_exp_mont(y, y, q, p, ctx, NULL)) {
+    goto end;
+  }
+  if (BN_is_one(y)) {
+    OPENSSL_PUT_ERROR(BN, BN_R_P_IS_NOT_PRIME);
+    goto end;
+  }
+
+  // Now we know that (if  p  is indeed prime) there is an integer
+  // k,  0 <= k < 2^e,  such that
+  //
+  //      a^q * y^k == 1   (mod p).
+  //
+  // As  a^q  is a square and  y  is not,  k  must be even.
+  // q+1  is even, too, so there is an element
+  //
+  //     X := a^((q+1)/2) * y^(k/2),
+  //
+  // and it satisfies
+  //
+  //     X^2 = a^q * a     * y^k
+  //         = a,
+  //
+  // so it is the square root that we are looking for.
+
+  // t := (q-1)/2  (note that  q  is odd)
+  if (!BN_rshift1(t, q)) {
+    goto end;
+  }
+
+  // x := a^((q-1)/2)
+  if (BN_is_zero(t))  // special case: p = 2^e + 1
+  {
+    if (!BN_nnmod(t, A, p, ctx)) {
+      goto end;
+    }
+    if (BN_is_zero(t)) {
+      // special case: a == 0  (mod p)
+      BN_zero(ret);
+      err = 0;
+      goto end;
+    } else if (!BN_one(x)) {
+      goto end;
+    }
+  } else {
+    if (!BN_mod_exp_mont(x, A, t, p, ctx, NULL)) {
+      goto end;
+    }
+    if (BN_is_zero(x)) {
+      // special case: a == 0  (mod p)
+      BN_zero(ret);
+      err = 0;
+      goto end;
+    }
+  }
+
+  // b := a*x^2  (= a^q)
+  if (!BN_mod_sqr(b, x, p, ctx) ||
+      !BN_mod_mul(b, b, A, p, ctx)) {
+    goto end;
+  }
+
+  // x := a*x    (= a^((q+1)/2))
+  if (!BN_mod_mul(x, x, A, p, ctx)) {
+    goto end;
+  }
+
+  while (1) {
+    // Now  b  is  a^q * y^k  for some even  k  (0 <= k < 2^E
+    // where  E  refers to the original value of  e,  which we
+    // don't keep in a variable),  and  x  is  a^((q+1)/2) * y^(k/2).
+    //
+    // We have  a*b = x^2,
+    //    y^2^(e-1) = -1,
+    //    b^2^(e-1) = 1.
+
+    if (BN_is_one(b)) {
+      if (!BN_copy(ret, x)) {
+        goto end;
+      }
+      err = 0;
+      goto vrfy;
+    }
+
+
+    // find smallest  i  such that  b^(2^i) = 1
+    i = 1;
+    if (!BN_mod_sqr(t, b, p, ctx)) {
+      goto end;
+    }
+    while (!BN_is_one(t)) {
+      i++;
+      if (i == e) {
+        OPENSSL_PUT_ERROR(BN, BN_R_NOT_A_SQUARE);
+        goto end;
+      }
+      if (!BN_mod_mul(t, t, t, p, ctx)) {
+        goto end;
+      }
+    }
+
+
+    // t := y^2^(e - i - 1)
+    if (!BN_copy(t, y)) {
+      goto end;
+    }
+    for (j = e - i - 1; j > 0; j--) {
+      if (!BN_mod_sqr(t, t, p, ctx)) {
+        goto end;
+      }
+    }
+    if (!BN_mod_mul(y, t, t, p, ctx) ||
+        !BN_mod_mul(x, x, t, p, ctx) ||
+        !BN_mod_mul(b, b, y, p, ctx)) {
+      goto end;
+    }
+    e = i;
+  }
+
+vrfy:
+  if (!err) {
+    // verify the result -- the input might have been not a square
+    // (test added in 0.9.8)
+
+    if (!BN_mod_sqr(x, ret, p, ctx)) {
+      err = 1;
+    }
+
+    if (!err && 0 != BN_cmp(x, A)) {
+      OPENSSL_PUT_ERROR(BN, BN_R_NOT_A_SQUARE);
+      err = 1;
+    }
+  }
+
+end:
+  if (err) {
+    if (ret != in) {
+      BN_clear_free(ret);
+    }
+    ret = NULL;
+  }
+  BN_CTX_end(ctx);
+  return ret;
+}
+
+int BN_sqrt(BIGNUM *out_sqrt, const BIGNUM *in, BN_CTX *ctx) {
+  BIGNUM *estimate, *tmp, *delta, *last_delta, *tmp2;
+  int ok = 0, last_delta_valid = 0;
+
+  if (in->neg) {
+    OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
+    return 0;
+  }
+  if (BN_is_zero(in)) {
+    BN_zero(out_sqrt);
+    return 1;
+  }
+
+  BN_CTX_start(ctx);
+  if (out_sqrt == in) {
+    estimate = BN_CTX_get(ctx);
+  } else {
+    estimate = out_sqrt;
+  }
+  tmp = BN_CTX_get(ctx);
+  last_delta = BN_CTX_get(ctx);
+  delta = BN_CTX_get(ctx);
+  if (estimate == NULL || tmp == NULL || last_delta == NULL || delta == NULL) {
+    OPENSSL_PUT_ERROR(BN, ERR_R_MALLOC_FAILURE);
+    goto err;
+  }
+
+  // We estimate that the square root of an n-bit number is 2^{n/2}.
+  if (!BN_lshift(estimate, BN_value_one(), BN_num_bits(in)/2)) {
+    goto err;
+  }
+
+  // This is Newton's method for finding a root of the equation |estimate|^2 -
+  // |in| = 0.
+  for (;;) {
+    // |estimate| = 1/2 * (|estimate| + |in|/|estimate|)
+    if (!BN_div(tmp, NULL, in, estimate, ctx) ||
+        !BN_add(tmp, tmp, estimate) ||
+        !BN_rshift1(estimate, tmp) ||
+        // |tmp| = |estimate|^2
+        !BN_sqr(tmp, estimate, ctx) ||
+        // |delta| = |in| - |tmp|
+        !BN_sub(delta, in, tmp)) {
+      OPENSSL_PUT_ERROR(BN, ERR_R_BN_LIB);
+      goto err;
+    }
+
+    delta->neg = 0;
+    // The difference between |in| and |estimate| squared is required to always
+    // decrease. This ensures that the loop always terminates, but I don't have
+    // a proof that it always finds the square root for a given square.
+    if (last_delta_valid && BN_cmp(delta, last_delta) >= 0) {
+      break;
+    }
+
+    last_delta_valid = 1;
+
+    tmp2 = last_delta;
+    last_delta = delta;
+    delta = tmp2;
+  }
+
+  if (BN_cmp(tmp, in) != 0) {
+    OPENSSL_PUT_ERROR(BN, BN_R_NOT_A_SQUARE);
+    goto err;
+  }
+
+  ok = 1;
+
+err:
+  if (ok && out_sqrt == in && !BN_copy(out_sqrt, estimate)) {
+    ok = 0;
+  }
+  BN_CTX_end(ctx);
+  return ok;
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bsaes-armv7.ios.arm.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bsaes-armv7.ios.arm.S
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/bsaes-armv7.linux.arm.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/bsaes-armv7.linux.arm.S
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/cipher/cipher.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/cipher/cipher.c
@ -0,0 +1,620 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.] */
+
+#include <CBigNumBoringSSL_cipher.h>
+
+#include <assert.h>
+#include <string.h>
+
+#include <CBigNumBoringSSL_err.h>
+#include <CBigNumBoringSSL_mem.h>
+#include <CBigNumBoringSSL_nid.h>
+
+#include "internal.h"
+#include "../../internal.h"
+
+
+void EVP_CIPHER_CTX_init(EVP_CIPHER_CTX *ctx) {
+  OPENSSL_memset(ctx, 0, sizeof(EVP_CIPHER_CTX));
+}
+
+EVP_CIPHER_CTX *EVP_CIPHER_CTX_new(void) {
+  EVP_CIPHER_CTX *ctx = OPENSSL_malloc(sizeof(EVP_CIPHER_CTX));
+  if (ctx) {
+    EVP_CIPHER_CTX_init(ctx);
+  }
+  return ctx;
+}
+
+int EVP_CIPHER_CTX_cleanup(EVP_CIPHER_CTX *c) {
+  if (c->cipher != NULL && c->cipher->cleanup) {
+    c->cipher->cleanup(c);
+  }
+  OPENSSL_free(c->cipher_data);
+
+  OPENSSL_memset(c, 0, sizeof(EVP_CIPHER_CTX));
+  return 1;
+}
+
+void EVP_CIPHER_CTX_free(EVP_CIPHER_CTX *ctx) {
+  if (ctx) {
+    EVP_CIPHER_CTX_cleanup(ctx);
+    OPENSSL_free(ctx);
+  }
+}
+
+int EVP_CIPHER_CTX_copy(EVP_CIPHER_CTX *out, const EVP_CIPHER_CTX *in) {
+  if (in == NULL || in->cipher == NULL) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_INPUT_NOT_INITIALIZED);
+    return 0;
+  }
+
+  EVP_CIPHER_CTX_cleanup(out);
+  OPENSSL_memcpy(out, in, sizeof(EVP_CIPHER_CTX));
+
+  if (in->cipher_data && in->cipher->ctx_size) {
+    out->cipher_data = OPENSSL_malloc(in->cipher->ctx_size);
+    if (!out->cipher_data) {
+      out->cipher = NULL;
+      OPENSSL_PUT_ERROR(CIPHER, ERR_R_MALLOC_FAILURE);
+      return 0;
+    }
+    OPENSSL_memcpy(out->cipher_data, in->cipher_data, in->cipher->ctx_size);
+  }
+
+  if (in->cipher->flags & EVP_CIPH_CUSTOM_COPY) {
+    if (!in->cipher->ctrl((EVP_CIPHER_CTX *)in, EVP_CTRL_COPY, 0, out)) {
+      out->cipher = NULL;
+      return 0;
+    }
+  }
+
+  return 1;
+}
+
+int EVP_CIPHER_CTX_reset(EVP_CIPHER_CTX *ctx) {
+  EVP_CIPHER_CTX_cleanup(ctx);
+  EVP_CIPHER_CTX_init(ctx);
+  return 1;
+}
+
+int EVP_CipherInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher,
+                      ENGINE *engine, const uint8_t *key, const uint8_t *iv,
+                      int enc) {
+  if (enc == -1) {
+    enc = ctx->encrypt;
+  } else {
+    if (enc) {
+      enc = 1;
+    }
+    ctx->encrypt = enc;
+  }
+
+  if (cipher) {
+    // Ensure a context left from last time is cleared (the previous check
+    // attempted to avoid this if the same ENGINE and EVP_CIPHER could be
+    // used).
+    if (ctx->cipher) {
+      EVP_CIPHER_CTX_cleanup(ctx);
+      // Restore encrypt and flags
+      ctx->encrypt = enc;
+    }
+
+    ctx->cipher = cipher;
+    if (ctx->cipher->ctx_size) {
+      ctx->cipher_data = OPENSSL_malloc(ctx->cipher->ctx_size);
+      if (!ctx->cipher_data) {
+        ctx->cipher = NULL;
+        OPENSSL_PUT_ERROR(CIPHER, ERR_R_MALLOC_FAILURE);
+        return 0;
+      }
+    } else {
+      ctx->cipher_data = NULL;
+    }
+
+    ctx->key_len = cipher->key_len;
+    ctx->flags = 0;
+
+    if (ctx->cipher->flags & EVP_CIPH_CTRL_INIT) {
+      if (!EVP_CIPHER_CTX_ctrl(ctx, EVP_CTRL_INIT, 0, NULL)) {
+        ctx->cipher = NULL;
+        OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_INITIALIZATION_ERROR);
+        return 0;
+      }
+    }
+  } else if (!ctx->cipher) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_NO_CIPHER_SET);
+    return 0;
+  }
+
+  // we assume block size is a power of 2 in *cryptUpdate
+  assert(ctx->cipher->block_size == 1 || ctx->cipher->block_size == 8 ||
+         ctx->cipher->block_size == 16);
+
+  if (!(EVP_CIPHER_CTX_flags(ctx) & EVP_CIPH_CUSTOM_IV)) {
+    switch (EVP_CIPHER_CTX_mode(ctx)) {
+      case EVP_CIPH_STREAM_CIPHER:
+      case EVP_CIPH_ECB_MODE:
+        break;
+
+      case EVP_CIPH_CFB_MODE:
+        ctx->num = 0;
+        OPENSSL_FALLTHROUGH;
+
+      case EVP_CIPH_CBC_MODE:
+        assert(EVP_CIPHER_CTX_iv_length(ctx) <= sizeof(ctx->iv));
+        if (iv) {
+          OPENSSL_memcpy(ctx->oiv, iv, EVP_CIPHER_CTX_iv_length(ctx));
+        }
+        OPENSSL_memcpy(ctx->iv, ctx->oiv, EVP_CIPHER_CTX_iv_length(ctx));
+        break;
+
+      case EVP_CIPH_CTR_MODE:
+      case EVP_CIPH_OFB_MODE:
+        ctx->num = 0;
+        // Don't reuse IV for CTR mode
+        if (iv) {
+          OPENSSL_memcpy(ctx->iv, iv, EVP_CIPHER_CTX_iv_length(ctx));
+        }
+        break;
+
+      default:
+        return 0;
+    }
+  }
+
+  if (key || (ctx->cipher->flags & EVP_CIPH_ALWAYS_CALL_INIT)) {
+    if (!ctx->cipher->init(ctx, key, iv, enc)) {
+      return 0;
+    }
+  }
+
+  ctx->buf_len = 0;
+  ctx->final_used = 0;
+  ctx->block_mask = ctx->cipher->block_size - 1;
+  return 1;
+}
+
+int EVP_EncryptInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher,
+                       ENGINE *impl, const uint8_t *key, const uint8_t *iv) {
+  return EVP_CipherInit_ex(ctx, cipher, impl, key, iv, 1);
+}
+
+int EVP_DecryptInit_ex(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher,
+                       ENGINE *impl, const uint8_t *key, const uint8_t *iv) {
+  return EVP_CipherInit_ex(ctx, cipher, impl, key, iv, 0);
+}
+
+int EVP_EncryptUpdate(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len,
+                      const uint8_t *in, int in_len) {
+  int i, j, bl;
+
+  if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) {
+    i = ctx->cipher->cipher(ctx, out, in, in_len);
+    if (i < 0) {
+      return 0;
+    } else {
+      *out_len = i;
+    }
+    return 1;
+  }
+
+  if (in_len <= 0) {
+    *out_len = 0;
+    return in_len == 0;
+  }
+
+  if (ctx->buf_len == 0 && (in_len & ctx->block_mask) == 0) {
+    if (ctx->cipher->cipher(ctx, out, in, in_len)) {
+      *out_len = in_len;
+      return 1;
+    } else {
+      *out_len = 0;
+      return 0;
+    }
+  }
+
+  i = ctx->buf_len;
+  bl = ctx->cipher->block_size;
+  assert(bl <= (int)sizeof(ctx->buf));
+  if (i != 0) {
+    if (bl - i > in_len) {
+      OPENSSL_memcpy(&ctx->buf[i], in, in_len);
+      ctx->buf_len += in_len;
+      *out_len = 0;
+      return 1;
+    } else {
+      j = bl - i;
+      OPENSSL_memcpy(&ctx->buf[i], in, j);
+      if (!ctx->cipher->cipher(ctx, out, ctx->buf, bl)) {
+        return 0;
+      }
+      in_len -= j;
+      in += j;
+      out += bl;
+      *out_len = bl;
+    }
+  } else {
+    *out_len = 0;
+  }
+
+  i = in_len & ctx->block_mask;
+  in_len -= i;
+  if (in_len > 0) {
+    if (!ctx->cipher->cipher(ctx, out, in, in_len)) {
+      return 0;
+    }
+    *out_len += in_len;
+  }
+
+  if (i != 0) {
+    OPENSSL_memcpy(ctx->buf, &in[in_len], i);
+  }
+  ctx->buf_len = i;
+  return 1;
+}
+
+int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len) {
+  int n, ret;
+  unsigned int i, b, bl;
+
+  if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) {
+    ret = ctx->cipher->cipher(ctx, out, NULL, 0);
+    if (ret < 0) {
+      return 0;
+    } else {
+      *out_len = ret;
+    }
+    return 1;
+  }
+
+  b = ctx->cipher->block_size;
+  assert(b <= sizeof(ctx->buf));
+  if (b == 1) {
+    *out_len = 0;
+    return 1;
+  }
+
+  bl = ctx->buf_len;
+  if (ctx->flags & EVP_CIPH_NO_PADDING) {
+    if (bl) {
+      OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_DATA_NOT_MULTIPLE_OF_BLOCK_LENGTH);
+      return 0;
+    }
+    *out_len = 0;
+    return 1;
+  }
+
+  n = b - bl;
+  for (i = bl; i < b; i++) {
+    ctx->buf[i] = n;
+  }
+  ret = ctx->cipher->cipher(ctx, out, ctx->buf, b);
+
+  if (ret) {
+    *out_len = b;
+  }
+
+  return ret;
+}
+
+int EVP_DecryptUpdate(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len,
+                      const uint8_t *in, int in_len) {
+  int fix_len;
+  unsigned int b;
+
+  if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) {
+    int r = ctx->cipher->cipher(ctx, out, in, in_len);
+    if (r < 0) {
+      *out_len = 0;
+      return 0;
+    } else {
+      *out_len = r;
+    }
+    return 1;
+  }
+
+  if (in_len <= 0) {
+    *out_len = 0;
+    return in_len == 0;
+  }
+
+  if (ctx->flags & EVP_CIPH_NO_PADDING) {
+    return EVP_EncryptUpdate(ctx, out, out_len, in, in_len);
+  }
+
+  b = ctx->cipher->block_size;
+  assert(b <= sizeof(ctx->final));
+
+  if (ctx->final_used) {
+    OPENSSL_memcpy(out, ctx->final, b);
+    out += b;
+    fix_len = 1;
+  } else {
+    fix_len = 0;
+  }
+
+  if (!EVP_EncryptUpdate(ctx, out, out_len, in, in_len)) {
+    return 0;
+  }
+
+  // if we have 'decrypted' a multiple of block size, make sure
+  // we have a copy of this last block
+  if (b > 1 && !ctx->buf_len) {
+    *out_len -= b;
+    ctx->final_used = 1;
+    OPENSSL_memcpy(ctx->final, &out[*out_len], b);
+  } else {
+    ctx->final_used = 0;
+  }
+
+  if (fix_len) {
+    *out_len += b;
+  }
+
+  return 1;
+}
+
+int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *out_len) {
+  int i, n;
+  unsigned int b;
+  *out_len = 0;
+
+  if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) {
+    i = ctx->cipher->cipher(ctx, out, NULL, 0);
+    if (i < 0) {
+      return 0;
+    } else {
+      *out_len = i;
+    }
+    return 1;
+  }
+
+  b = ctx->cipher->block_size;
+  if (ctx->flags & EVP_CIPH_NO_PADDING) {
+    if (ctx->buf_len) {
+      OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_DATA_NOT_MULTIPLE_OF_BLOCK_LENGTH);
+      return 0;
+    }
+    *out_len = 0;
+    return 1;
+  }
+
+  if (b > 1) {
+    if (ctx->buf_len || !ctx->final_used) {
+      OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_WRONG_FINAL_BLOCK_LENGTH);
+      return 0;
+    }
+    assert(b <= sizeof(ctx->final));
+
+    // The following assumes that the ciphertext has been authenticated.
+    // Otherwise it provides a padding oracle.
+    n = ctx->final[b - 1];
+    if (n == 0 || n > (int)b) {
+      OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_DECRYPT);
+      return 0;
+    }
+
+    for (i = 0; i < n; i++) {
+      if (ctx->final[--b] != n) {
+        OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_DECRYPT);
+        return 0;
+      }
+    }
+
+    n = ctx->cipher->block_size - n;
+    for (i = 0; i < n; i++) {
+      out[i] = ctx->final[i];
+    }
+    *out_len = n;
+  } else {
+    *out_len = 0;
+  }
+
+  return 1;
+}
+
+int EVP_Cipher(EVP_CIPHER_CTX *ctx, uint8_t *out, const uint8_t *in,
+               size_t in_len) {
+  return ctx->cipher->cipher(ctx, out, in, in_len);
+}
+
+int EVP_CipherUpdate(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len,
+                     const uint8_t *in, int in_len) {
+  if (ctx->encrypt) {
+    return EVP_EncryptUpdate(ctx, out, out_len, in, in_len);
+  } else {
+    return EVP_DecryptUpdate(ctx, out, out_len, in, in_len);
+  }
+}
+
+int EVP_CipherFinal_ex(EVP_CIPHER_CTX *ctx, uint8_t *out, int *out_len) {
+  if (ctx->encrypt) {
+    return EVP_EncryptFinal_ex(ctx, out, out_len);
+  } else {
+    return EVP_DecryptFinal_ex(ctx, out, out_len);
+  }
+}
+
+const EVP_CIPHER *EVP_CIPHER_CTX_cipher(const EVP_CIPHER_CTX *ctx) {
+  return ctx->cipher;
+}
+
+int EVP_CIPHER_CTX_nid(const EVP_CIPHER_CTX *ctx) {
+  return ctx->cipher->nid;
+}
+
+int EVP_CIPHER_CTX_encrypting(const EVP_CIPHER_CTX *ctx) {
+  return ctx->encrypt;
+}
+
+unsigned EVP_CIPHER_CTX_block_size(const EVP_CIPHER_CTX *ctx) {
+  return ctx->cipher->block_size;
+}
+
+unsigned EVP_CIPHER_CTX_key_length(const EVP_CIPHER_CTX *ctx) {
+  return ctx->key_len;
+}
+
+unsigned EVP_CIPHER_CTX_iv_length(const EVP_CIPHER_CTX *ctx) {
+  return ctx->cipher->iv_len;
+}
+
+void *EVP_CIPHER_CTX_get_app_data(const EVP_CIPHER_CTX *ctx) {
+  return ctx->app_data;
+}
+
+void EVP_CIPHER_CTX_set_app_data(EVP_CIPHER_CTX *ctx, void *data) {
+  ctx->app_data = data;
+}
+
+uint32_t EVP_CIPHER_CTX_flags(const EVP_CIPHER_CTX *ctx) {
+  return ctx->cipher->flags & ~EVP_CIPH_MODE_MASK;
+}
+
+uint32_t EVP_CIPHER_CTX_mode(const EVP_CIPHER_CTX *ctx) {
+  return ctx->cipher->flags & EVP_CIPH_MODE_MASK;
+}
+
+int EVP_CIPHER_CTX_ctrl(EVP_CIPHER_CTX *ctx, int command, int arg, void *ptr) {
+  int ret;
+  if (!ctx->cipher) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_NO_CIPHER_SET);
+    return 0;
+  }
+
+  if (!ctx->cipher->ctrl) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_CTRL_NOT_IMPLEMENTED);
+    return 0;
+  }
+
+  ret = ctx->cipher->ctrl(ctx, command, arg, ptr);
+  if (ret == -1) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_CTRL_OPERATION_NOT_IMPLEMENTED);
+    return 0;
+  }
+
+  return ret;
+}
+
+int EVP_CIPHER_CTX_set_padding(EVP_CIPHER_CTX *ctx, int pad) {
+  if (pad) {
+    ctx->flags &= ~EVP_CIPH_NO_PADDING;
+  } else {
+    ctx->flags |= EVP_CIPH_NO_PADDING;
+  }
+  return 1;
+}
+
+int EVP_CIPHER_CTX_set_key_length(EVP_CIPHER_CTX *c, unsigned key_len) {
+  if (c->key_len == key_len) {
+    return 1;
+  }
+
+  if (key_len == 0 || !(c->cipher->flags & EVP_CIPH_VARIABLE_LENGTH)) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_INVALID_KEY_LENGTH);
+    return 0;
+  }
+
+  c->key_len = key_len;
+  return 1;
+}
+
+int EVP_CIPHER_nid(const EVP_CIPHER *cipher) { return cipher->nid; }
+
+unsigned EVP_CIPHER_block_size(const EVP_CIPHER *cipher) {
+  return cipher->block_size;
+}
+
+unsigned EVP_CIPHER_key_length(const EVP_CIPHER *cipher) {
+  return cipher->key_len;
+}
+
+unsigned EVP_CIPHER_iv_length(const EVP_CIPHER *cipher) {
+  return cipher->iv_len;
+}
+
+uint32_t EVP_CIPHER_flags(const EVP_CIPHER *cipher) {
+  return cipher->flags & ~EVP_CIPH_MODE_MASK;
+}
+
+uint32_t EVP_CIPHER_mode(const EVP_CIPHER *cipher) {
+  return cipher->flags & EVP_CIPH_MODE_MASK;
+}
+
+int EVP_CipherInit(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher,
+                   const uint8_t *key, const uint8_t *iv, int enc) {
+  if (cipher) {
+    EVP_CIPHER_CTX_init(ctx);
+  }
+  return EVP_CipherInit_ex(ctx, cipher, NULL, key, iv, enc);
+}
+
+int EVP_EncryptInit(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher,
+                    const uint8_t *key, const uint8_t *iv) {
+  return EVP_CipherInit(ctx, cipher, key, iv, 1);
+}
+
+int EVP_DecryptInit(EVP_CIPHER_CTX *ctx, const EVP_CIPHER *cipher,
+                    const uint8_t *key, const uint8_t *iv) {
+  return EVP_CipherInit(ctx, cipher, key, iv, 0);
+}
+
+int EVP_add_cipher_alias(const char *a, const char *b) {
+  return 1;
+}
+
+void EVP_CIPHER_CTX_set_flags(const EVP_CIPHER_CTX *ctx, uint32_t flags) {}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/cipher/e_aes.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/cipher/e_aes.c
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/cipher/internal.h
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/cipher/internal.h
@ -0,0 +1,128 @@
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.] */
+
+#ifndef OPENSSL_HEADER_CIPHER_INTERNAL_H
+#define OPENSSL_HEADER_CIPHER_INTERNAL_H
+
+#include <CBigNumBoringSSL_base.h>
+
+#include <CBigNumBoringSSL_aead.h>
+#include <CBigNumBoringSSL_aes.h>
+
+#include "../../internal.h"
+#include "../modes/internal.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+// EVP_CIPH_MODE_MASK contains the bits of |flags| that represent the mode.
+#define EVP_CIPH_MODE_MASK 0x3f
+
+// EVP_AEAD represents a specific AEAD algorithm.
+struct evp_aead_st {
+  uint8_t key_len;
+  uint8_t nonce_len;
+  uint8_t overhead;
+  uint8_t max_tag_len;
+  int seal_scatter_supports_extra_in;
+
+  // init initialises an |EVP_AEAD_CTX|. If this call returns zero then
+  // |cleanup| will not be called for that context.
+  int (*init)(EVP_AEAD_CTX *, const uint8_t *key, size_t key_len,
+              size_t tag_len);
+  int (*init_with_direction)(EVP_AEAD_CTX *, const uint8_t *key, size_t key_len,
+                             size_t tag_len, enum evp_aead_direction_t dir);
+  void (*cleanup)(EVP_AEAD_CTX *);
+
+  int (*open)(const EVP_AEAD_CTX *ctx, uint8_t *out, size_t *out_len,
+              size_t max_out_len, const uint8_t *nonce, size_t nonce_len,
+              const uint8_t *in, size_t in_len, const uint8_t *ad,
+              size_t ad_len);
+
+  int (*seal_scatter)(const EVP_AEAD_CTX *ctx, uint8_t *out, uint8_t *out_tag,
+                      size_t *out_tag_len, size_t max_out_tag_len,
+                      const uint8_t *nonce, size_t nonce_len, const uint8_t *in,
+                      size_t in_len, const uint8_t *extra_in,
+                      size_t extra_in_len, const uint8_t *ad, size_t ad_len);
+
+  int (*open_gather)(const EVP_AEAD_CTX *ctx, uint8_t *out,
+                     const uint8_t *nonce, size_t nonce_len, const uint8_t *in,
+                     size_t in_len, const uint8_t *in_tag, size_t in_tag_len,
+                     const uint8_t *ad, size_t ad_len);
+
+  int (*get_iv)(const EVP_AEAD_CTX *ctx, const uint8_t **out_iv,
+                size_t *out_len);
+
+  size_t (*tag_len)(const EVP_AEAD_CTX *ctx, size_t in_Len,
+                    size_t extra_in_len);
+};
+
+// aes_ctr_set_key initialises |*aes_key| using |key_bytes| bytes from |key|,
+// where |key_bytes| must either be 16, 24 or 32. If not NULL, |*out_block| is
+// set to a function that encrypts single blocks. If not NULL, |*gcm_key| is
+// initialised to do GHASH with the given key. It returns a function for
+// optimised CTR-mode, or NULL if CTR-mode should be built using |*out_block|.
+ctr128_f aes_ctr_set_key(AES_KEY *aes_key, GCM128_KEY *gcm_key,
+                         block128_f *out_block, const uint8_t *key,
+                         size_t key_bytes);
+
+#if defined(__cplusplus)
+}  // extern C
+#endif
+
+#endif  // OPENSSL_HEADER_CIPHER_INTERNAL_H
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/co-586.linux.x86.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/co-586.linux.x86.S
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/delocate.h
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/delocate.h
@ -0,0 +1,89 @@
+/* Copyright (c) 2017, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#ifndef OPENSSL_HEADER_FIPSMODULE_DELOCATE_H
+#define OPENSSL_HEADER_FIPSMODULE_DELOCATE_H
+
+#include <CBigNumBoringSSL_base.h>
+
+#include "../internal.h"
+
+
+#if !defined(BORINGSSL_SHARED_LIBRARY) && defined(BORINGSSL_FIPS) && \
+    !defined(OPENSSL_ASAN) && !defined(OPENSSL_MSAN)
+#define DEFINE_BSS_GET(type, name)        \
+  static type name __attribute__((used)); \
+  type *name##_bss_get(void) __attribute__((const));
+// For FIPS builds we require that CRYPTO_ONCE_INIT be zero.
+#define DEFINE_STATIC_ONCE(name) DEFINE_BSS_GET(CRYPTO_once_t, name)
+// For FIPS builds we require that CRYPTO_STATIC_MUTEX_INIT be zero.
+#define DEFINE_STATIC_MUTEX(name) \
+  DEFINE_BSS_GET(struct CRYPTO_STATIC_MUTEX, name)
+// For FIPS builds we require that CRYPTO_EX_DATA_CLASS_INIT be zero.
+#define DEFINE_STATIC_EX_DATA_CLASS(name) \
+  DEFINE_BSS_GET(CRYPTO_EX_DATA_CLASS, name)
+#else
+#define DEFINE_BSS_GET(type, name) \
+  static type name;                \
+  static type *name##_bss_get(void) { return &name; }
+#define DEFINE_STATIC_ONCE(name)                \
+  static CRYPTO_once_t name = CRYPTO_ONCE_INIT; \
+  static CRYPTO_once_t *name##_bss_get(void) { return &name; }
+#define DEFINE_STATIC_MUTEX(name)                                    \
+  static struct CRYPTO_STATIC_MUTEX name = CRYPTO_STATIC_MUTEX_INIT; \
+  static struct CRYPTO_STATIC_MUTEX *name##_bss_get(void) { return &name; }
+#define DEFINE_STATIC_EX_DATA_CLASS(name)                       \
+  static CRYPTO_EX_DATA_CLASS name = CRYPTO_EX_DATA_CLASS_INIT; \
+  static CRYPTO_EX_DATA_CLASS *name##_bss_get(void) { return &name; }
+#endif
+
+#define DEFINE_DATA(type, name, accessor_decorations)                         \
+  DEFINE_BSS_GET(type, name##_storage)                                        \
+  DEFINE_STATIC_ONCE(name##_once)                                             \
+  static void name##_do_init(type *out);                                      \
+  static void name##_init(void) { name##_do_init(name##_storage_bss_get()); } \
+  accessor_decorations type *name(void) {                                     \
+    CRYPTO_once(name##_once_bss_get(), name##_init);                          \
+    /* See http://c-faq.com/ansi/constmismatch.html for why the following     \
+     * cast is needed. */                                                     \
+    return (const type *)name##_storage_bss_get();                            \
+  }                                                                           \
+  static void name##_do_init(type *out)
+
+// DEFINE_METHOD_FUNCTION defines a function named |name| which returns a
+// method table of type const |type|*. In FIPS mode, to avoid rel.ro data, it
+// is split into a CRYPTO_once_t-guarded initializer in the module and
+// unhashed, non-module accessor functions to space reserved in the BSS. The
+// method table is initialized by a caller-supplied function which takes a
+// parameter named |out| of type |type|*. The caller should follow the macro
+// invocation with the body of this function:
+//
+//     DEFINE_METHOD_FUNCTION(EVP_MD, EVP_md4) {
+//       out->type = NID_md4;
+//       out->md_size = MD4_DIGEST_LENGTH;
+//       out->flags = 0;
+//       out->init = md4_init;
+//       out->update = md4_update;
+//       out->final = md4_final;
+//       out->block_size = 64;
+//       out->ctx_size = sizeof(MD4_CTX);
+//     }
+//
+// This mechanism does not use a static initializer because their execution
+// order is undefined. See FIPS.md for more details.
+#define DEFINE_METHOD_FUNCTION(type, name) DEFINE_DATA(type, name, const)
+
+#define DEFINE_LOCAL_DATA(type, name) DEFINE_DATA(type, name, static const)
+
+#endif  // OPENSSL_HEADER_FIPSMODULE_DELOCATE_H
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-armv4.ios.arm.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-armv4.ios.arm.S
@ -0,0 +1,265 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__arm__) && defined(__APPLE__)
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+#include <CBigNumBoringSSL_arm_arch.h>
+
+@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
+@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
+@ instructions are in aesv8-armx.pl.)
+
+
+.text
+#if defined(__thumb2__) || defined(__clang__)
+.syntax	unified
+#define ldrplb  ldrbpl
+#define ldrneb  ldrbne
+#endif
+#if defined(__thumb2__)
+.thumb
+#else
+.code	32
+#endif
+#if __ARM_MAX_ARCH__>=7
+
+
+
+.globl	_gcm_init_neon
+.private_extern	_gcm_init_neon
+#ifdef __thumb2__
+.thumb_func	_gcm_init_neon
+#endif
+.align	4
+_gcm_init_neon:
+	vld1.64	d7,[r1]!		@ load H
+	vmov.i8	q8,#0xe1
+	vld1.64	d6,[r1]
+	vshl.i64	d17,#57
+	vshr.u64	d16,#63		@ t0=0xc2....01
+	vdup.8	q9,d7[7]
+	vshr.u64	d26,d6,#63
+	vshr.s8	q9,#7			@ broadcast carry bit
+	vshl.i64	q3,q3,#1
+	vand	q8,q8,q9
+	vorr	d7,d26		@ H<<<=1
+	veor	q3,q3,q8		@ twisted H
+	vstmia	r0,{q3}
+
+	bx	lr					@ bx lr
+
+
+.globl	_gcm_gmult_neon
+.private_extern	_gcm_gmult_neon
+#ifdef __thumb2__
+.thumb_func	_gcm_gmult_neon
+#endif
+.align	4
+_gcm_gmult_neon:
+	vld1.64	d7,[r0]!		@ load Xi
+	vld1.64	d6,[r0]!
+	vmov.i64	d29,#0x0000ffffffffffff
+	vldmia	r1,{d26,d27}	@ load twisted H
+	vmov.i64	d30,#0x00000000ffffffff
+#ifdef __ARMEL__
+	vrev64.8	q3,q3
+#endif
+	vmov.i64	d31,#0x000000000000ffff
+	veor	d28,d26,d27		@ Karatsuba pre-processing
+	mov	r3,#16
+	b	Lgmult_neon
+
+
+.globl	_gcm_ghash_neon
+.private_extern	_gcm_ghash_neon
+#ifdef __thumb2__
+.thumb_func	_gcm_ghash_neon
+#endif
+.align	4
+_gcm_ghash_neon:
+	vld1.64	d1,[r0]!		@ load Xi
+	vld1.64	d0,[r0]!
+	vmov.i64	d29,#0x0000ffffffffffff
+	vldmia	r1,{d26,d27}	@ load twisted H
+	vmov.i64	d30,#0x00000000ffffffff
+#ifdef __ARMEL__
+	vrev64.8	q0,q0
+#endif
+	vmov.i64	d31,#0x000000000000ffff
+	veor	d28,d26,d27		@ Karatsuba pre-processing
+
+Loop_neon:
+	vld1.64	d7,[r2]!		@ load inp
+	vld1.64	d6,[r2]!
+#ifdef __ARMEL__
+	vrev64.8	q3,q3
+#endif
+	veor	q3,q0			@ inp^=Xi
+Lgmult_neon:
+	vext.8	d16, d26, d26, #1	@ A1
+	vmull.p8	q8, d16, d6		@ F = A1*B
+	vext.8	d0, d6, d6, #1	@ B1
+	vmull.p8	q0, d26, d0		@ E = A*B1
+	vext.8	d18, d26, d26, #2	@ A2
+	vmull.p8	q9, d18, d6		@ H = A2*B
+	vext.8	d22, d6, d6, #2	@ B2
+	vmull.p8	q11, d26, d22		@ G = A*B2
+	vext.8	d20, d26, d26, #3	@ A3
+	veor	q8, q8, q0		@ L = E + F
+	vmull.p8	q10, d20, d6		@ J = A3*B
+	vext.8	d0, d6, d6, #3	@ B3
+	veor	q9, q9, q11		@ M = G + H
+	vmull.p8	q0, d26, d0		@ I = A*B3
+	veor	d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
+	vand	d17, d17, d29
+	vext.8	d22, d6, d6, #4	@ B4
+	veor	d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
+	vand	d19, d19, d30
+	vmull.p8	q11, d26, d22		@ K = A*B4
+	veor	q10, q10, q0		@ N = I + J
+	veor	d16, d16, d17
+	veor	d18, d18, d19
+	veor	d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
+	vand	d21, d21, d31
+	vext.8	q8, q8, q8, #15
+	veor	d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
+	vmov.i64	d23, #0
+	vext.8	q9, q9, q9, #14
+	veor	d20, d20, d21
+	vmull.p8	q0, d26, d6		@ D = A*B
+	vext.8	q11, q11, q11, #12
+	vext.8	q10, q10, q10, #13
+	veor	q8, q8, q9
+	veor	q10, q10, q11
+	veor	q0, q0, q8
+	veor	q0, q0, q10
+	veor	d6,d6,d7	@ Karatsuba pre-processing
+	vext.8	d16, d28, d28, #1	@ A1
+	vmull.p8	q8, d16, d6		@ F = A1*B
+	vext.8	d2, d6, d6, #1	@ B1
+	vmull.p8	q1, d28, d2		@ E = A*B1
+	vext.8	d18, d28, d28, #2	@ A2
+	vmull.p8	q9, d18, d6		@ H = A2*B
+	vext.8	d22, d6, d6, #2	@ B2
+	vmull.p8	q11, d28, d22		@ G = A*B2
+	vext.8	d20, d28, d28, #3	@ A3
+	veor	q8, q8, q1		@ L = E + F
+	vmull.p8	q10, d20, d6		@ J = A3*B
+	vext.8	d2, d6, d6, #3	@ B3
+	veor	q9, q9, q11		@ M = G + H
+	vmull.p8	q1, d28, d2		@ I = A*B3
+	veor	d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
+	vand	d17, d17, d29
+	vext.8	d22, d6, d6, #4	@ B4
+	veor	d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
+	vand	d19, d19, d30
+	vmull.p8	q11, d28, d22		@ K = A*B4
+	veor	q10, q10, q1		@ N = I + J
+	veor	d16, d16, d17
+	veor	d18, d18, d19
+	veor	d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
+	vand	d21, d21, d31
+	vext.8	q8, q8, q8, #15
+	veor	d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
+	vmov.i64	d23, #0
+	vext.8	q9, q9, q9, #14
+	veor	d20, d20, d21
+	vmull.p8	q1, d28, d6		@ D = A*B
+	vext.8	q11, q11, q11, #12
+	vext.8	q10, q10, q10, #13
+	veor	q8, q8, q9
+	veor	q10, q10, q11
+	veor	q1, q1, q8
+	veor	q1, q1, q10
+	vext.8	d16, d27, d27, #1	@ A1
+	vmull.p8	q8, d16, d7		@ F = A1*B
+	vext.8	d4, d7, d7, #1	@ B1
+	vmull.p8	q2, d27, d4		@ E = A*B1
+	vext.8	d18, d27, d27, #2	@ A2
+	vmull.p8	q9, d18, d7		@ H = A2*B
+	vext.8	d22, d7, d7, #2	@ B2
+	vmull.p8	q11, d27, d22		@ G = A*B2
+	vext.8	d20, d27, d27, #3	@ A3
+	veor	q8, q8, q2		@ L = E + F
+	vmull.p8	q10, d20, d7		@ J = A3*B
+	vext.8	d4, d7, d7, #3	@ B3
+	veor	q9, q9, q11		@ M = G + H
+	vmull.p8	q2, d27, d4		@ I = A*B3
+	veor	d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
+	vand	d17, d17, d29
+	vext.8	d22, d7, d7, #4	@ B4
+	veor	d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
+	vand	d19, d19, d30
+	vmull.p8	q11, d27, d22		@ K = A*B4
+	veor	q10, q10, q2		@ N = I + J
+	veor	d16, d16, d17
+	veor	d18, d18, d19
+	veor	d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
+	vand	d21, d21, d31
+	vext.8	q8, q8, q8, #15
+	veor	d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
+	vmov.i64	d23, #0
+	vext.8	q9, q9, q9, #14
+	veor	d20, d20, d21
+	vmull.p8	q2, d27, d7		@ D = A*B
+	vext.8	q11, q11, q11, #12
+	vext.8	q10, q10, q10, #13
+	veor	q8, q8, q9
+	veor	q10, q10, q11
+	veor	q2, q2, q8
+	veor	q2, q2, q10
+	veor	q1,q1,q0		@ Karatsuba post-processing
+	veor	q1,q1,q2
+	veor	d1,d1,d2
+	veor	d4,d4,d3	@ Xh|Xl - 256-bit result
+
+	@ equivalent of reduction_avx from ghash-x86_64.pl
+	vshl.i64	q9,q0,#57		@ 1st phase
+	vshl.i64	q10,q0,#62
+	veor	q10,q10,q9		@
+	vshl.i64	q9,q0,#63
+	veor	q10, q10, q9		@
+	veor	d1,d1,d20	@
+	veor	d4,d4,d21
+
+	vshr.u64	q10,q0,#1		@ 2nd phase
+	veor	q2,q2,q0
+	veor	q0,q0,q10		@
+	vshr.u64	q10,q10,#6
+	vshr.u64	q0,q0,#1		@
+	veor	q0,q0,q2		@
+	veor	q0,q0,q10		@
+
+	subs	r3,#16
+	bne	Loop_neon
+
+#ifdef __ARMEL__
+	vrev64.8	q0,q0
+#endif
+	sub	r0,#16
+	vst1.64	d1,[r0]!		@ write out Xi
+	vst1.64	d0,[r0]
+
+	bx	lr					@ bx lr
+
+#endif
+.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	2
+#endif  // !OPENSSL_NO_ASM
+#endif  // defined(__arm__) && defined(__APPLE__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-armv4.linux.arm.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-armv4.linux.arm.S
@ -0,0 +1,262 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__arm__) && defined(__linux__)
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(__arm__)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+#include <CBigNumBoringSSL_arm_arch.h>
+
+@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
+@ ARMv7 and ARMv8 processors and does not use ARMv8 instructions. (ARMv8 PMULL
+@ instructions are in aesv8-armx.pl.)
+.arch	armv7-a
+
+.text
+#if defined(__thumb2__) || defined(__clang__)
+.syntax	unified
+#define ldrplb  ldrbpl
+#define ldrneb  ldrbne
+#endif
+#if defined(__thumb2__)
+.thumb
+#else
+.code	32
+#endif
+#if __ARM_MAX_ARCH__>=7
+.arch	armv7-a
+.fpu	neon
+
+.globl	gcm_init_neon
+.hidden	gcm_init_neon
+.type	gcm_init_neon,%function
+.align	4
+gcm_init_neon:
+	vld1.64	d7,[r1]!		@ load H
+	vmov.i8	q8,#0xe1
+	vld1.64	d6,[r1]
+	vshl.i64	d17,#57
+	vshr.u64	d16,#63		@ t0=0xc2....01
+	vdup.8	q9,d7[7]
+	vshr.u64	d26,d6,#63
+	vshr.s8	q9,#7			@ broadcast carry bit
+	vshl.i64	q3,q3,#1
+	vand	q8,q8,q9
+	vorr	d7,d26		@ H<<<=1
+	veor	q3,q3,q8		@ twisted H
+	vstmia	r0,{q3}
+
+	bx	lr					@ bx lr
+.size	gcm_init_neon,.-gcm_init_neon
+
+.globl	gcm_gmult_neon
+.hidden	gcm_gmult_neon
+.type	gcm_gmult_neon,%function
+.align	4
+gcm_gmult_neon:
+	vld1.64	d7,[r0]!		@ load Xi
+	vld1.64	d6,[r0]!
+	vmov.i64	d29,#0x0000ffffffffffff
+	vldmia	r1,{d26,d27}	@ load twisted H
+	vmov.i64	d30,#0x00000000ffffffff
+#ifdef __ARMEL__
+	vrev64.8	q3,q3
+#endif
+	vmov.i64	d31,#0x000000000000ffff
+	veor	d28,d26,d27		@ Karatsuba pre-processing
+	mov	r3,#16
+	b	.Lgmult_neon
+.size	gcm_gmult_neon,.-gcm_gmult_neon
+
+.globl	gcm_ghash_neon
+.hidden	gcm_ghash_neon
+.type	gcm_ghash_neon,%function
+.align	4
+gcm_ghash_neon:
+	vld1.64	d1,[r0]!		@ load Xi
+	vld1.64	d0,[r0]!
+	vmov.i64	d29,#0x0000ffffffffffff
+	vldmia	r1,{d26,d27}	@ load twisted H
+	vmov.i64	d30,#0x00000000ffffffff
+#ifdef __ARMEL__
+	vrev64.8	q0,q0
+#endif
+	vmov.i64	d31,#0x000000000000ffff
+	veor	d28,d26,d27		@ Karatsuba pre-processing
+
+.Loop_neon:
+	vld1.64	d7,[r2]!		@ load inp
+	vld1.64	d6,[r2]!
+#ifdef __ARMEL__
+	vrev64.8	q3,q3
+#endif
+	veor	q3,q0			@ inp^=Xi
+.Lgmult_neon:
+	vext.8	d16, d26, d26, #1	@ A1
+	vmull.p8	q8, d16, d6		@ F = A1*B
+	vext.8	d0, d6, d6, #1	@ B1
+	vmull.p8	q0, d26, d0		@ E = A*B1
+	vext.8	d18, d26, d26, #2	@ A2
+	vmull.p8	q9, d18, d6		@ H = A2*B
+	vext.8	d22, d6, d6, #2	@ B2
+	vmull.p8	q11, d26, d22		@ G = A*B2
+	vext.8	d20, d26, d26, #3	@ A3
+	veor	q8, q8, q0		@ L = E + F
+	vmull.p8	q10, d20, d6		@ J = A3*B
+	vext.8	d0, d6, d6, #3	@ B3
+	veor	q9, q9, q11		@ M = G + H
+	vmull.p8	q0, d26, d0		@ I = A*B3
+	veor	d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
+	vand	d17, d17, d29
+	vext.8	d22, d6, d6, #4	@ B4
+	veor	d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
+	vand	d19, d19, d30
+	vmull.p8	q11, d26, d22		@ K = A*B4
+	veor	q10, q10, q0		@ N = I + J
+	veor	d16, d16, d17
+	veor	d18, d18, d19
+	veor	d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
+	vand	d21, d21, d31
+	vext.8	q8, q8, q8, #15
+	veor	d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
+	vmov.i64	d23, #0
+	vext.8	q9, q9, q9, #14
+	veor	d20, d20, d21
+	vmull.p8	q0, d26, d6		@ D = A*B
+	vext.8	q11, q11, q11, #12
+	vext.8	q10, q10, q10, #13
+	veor	q8, q8, q9
+	veor	q10, q10, q11
+	veor	q0, q0, q8
+	veor	q0, q0, q10
+	veor	d6,d6,d7	@ Karatsuba pre-processing
+	vext.8	d16, d28, d28, #1	@ A1
+	vmull.p8	q8, d16, d6		@ F = A1*B
+	vext.8	d2, d6, d6, #1	@ B1
+	vmull.p8	q1, d28, d2		@ E = A*B1
+	vext.8	d18, d28, d28, #2	@ A2
+	vmull.p8	q9, d18, d6		@ H = A2*B
+	vext.8	d22, d6, d6, #2	@ B2
+	vmull.p8	q11, d28, d22		@ G = A*B2
+	vext.8	d20, d28, d28, #3	@ A3
+	veor	q8, q8, q1		@ L = E + F
+	vmull.p8	q10, d20, d6		@ J = A3*B
+	vext.8	d2, d6, d6, #3	@ B3
+	veor	q9, q9, q11		@ M = G + H
+	vmull.p8	q1, d28, d2		@ I = A*B3
+	veor	d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
+	vand	d17, d17, d29
+	vext.8	d22, d6, d6, #4	@ B4
+	veor	d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
+	vand	d19, d19, d30
+	vmull.p8	q11, d28, d22		@ K = A*B4
+	veor	q10, q10, q1		@ N = I + J
+	veor	d16, d16, d17
+	veor	d18, d18, d19
+	veor	d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
+	vand	d21, d21, d31
+	vext.8	q8, q8, q8, #15
+	veor	d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
+	vmov.i64	d23, #0
+	vext.8	q9, q9, q9, #14
+	veor	d20, d20, d21
+	vmull.p8	q1, d28, d6		@ D = A*B
+	vext.8	q11, q11, q11, #12
+	vext.8	q10, q10, q10, #13
+	veor	q8, q8, q9
+	veor	q10, q10, q11
+	veor	q1, q1, q8
+	veor	q1, q1, q10
+	vext.8	d16, d27, d27, #1	@ A1
+	vmull.p8	q8, d16, d7		@ F = A1*B
+	vext.8	d4, d7, d7, #1	@ B1
+	vmull.p8	q2, d27, d4		@ E = A*B1
+	vext.8	d18, d27, d27, #2	@ A2
+	vmull.p8	q9, d18, d7		@ H = A2*B
+	vext.8	d22, d7, d7, #2	@ B2
+	vmull.p8	q11, d27, d22		@ G = A*B2
+	vext.8	d20, d27, d27, #3	@ A3
+	veor	q8, q8, q2		@ L = E + F
+	vmull.p8	q10, d20, d7		@ J = A3*B
+	vext.8	d4, d7, d7, #3	@ B3
+	veor	q9, q9, q11		@ M = G + H
+	vmull.p8	q2, d27, d4		@ I = A*B3
+	veor	d16, d16, d17	@ t0 = (L) (P0 + P1) << 8
+	vand	d17, d17, d29
+	vext.8	d22, d7, d7, #4	@ B4
+	veor	d18, d18, d19	@ t1 = (M) (P2 + P3) << 16
+	vand	d19, d19, d30
+	vmull.p8	q11, d27, d22		@ K = A*B4
+	veor	q10, q10, q2		@ N = I + J
+	veor	d16, d16, d17
+	veor	d18, d18, d19
+	veor	d20, d20, d21	@ t2 = (N) (P4 + P5) << 24
+	vand	d21, d21, d31
+	vext.8	q8, q8, q8, #15
+	veor	d22, d22, d23	@ t3 = (K) (P6 + P7) << 32
+	vmov.i64	d23, #0
+	vext.8	q9, q9, q9, #14
+	veor	d20, d20, d21
+	vmull.p8	q2, d27, d7		@ D = A*B
+	vext.8	q11, q11, q11, #12
+	vext.8	q10, q10, q10, #13
+	veor	q8, q8, q9
+	veor	q10, q10, q11
+	veor	q2, q2, q8
+	veor	q2, q2, q10
+	veor	q1,q1,q0		@ Karatsuba post-processing
+	veor	q1,q1,q2
+	veor	d1,d1,d2
+	veor	d4,d4,d3	@ Xh|Xl - 256-bit result
+
+	@ equivalent of reduction_avx from ghash-x86_64.pl
+	vshl.i64	q9,q0,#57		@ 1st phase
+	vshl.i64	q10,q0,#62
+	veor	q10,q10,q9		@
+	vshl.i64	q9,q0,#63
+	veor	q10, q10, q9		@
+	veor	d1,d1,d20	@
+	veor	d4,d4,d21
+
+	vshr.u64	q10,q0,#1		@ 2nd phase
+	veor	q2,q2,q0
+	veor	q0,q0,q10		@
+	vshr.u64	q10,q10,#6
+	vshr.u64	q0,q0,#1		@
+	veor	q0,q0,q2		@
+	veor	q0,q0,q10		@
+
+	subs	r3,#16
+	bne	.Loop_neon
+
+#ifdef __ARMEL__
+	vrev64.8	q0,q0
+#endif
+	sub	r0,#16
+	vst1.64	d1,[r0]!		@ write out Xi
+	vst1.64	d0,[r0]
+
+	bx	lr					@ bx lr
+.size	gcm_ghash_neon,.-gcm_ghash_neon
+#endif
+.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	2
+#endif
+#endif  // !OPENSSL_NO_ASM
+.section	.note.GNU-stack,"",%progbits
+#endif  // defined(__arm__) && defined(__linux__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-neon-armv8.ios.aarch64.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-neon-armv8.ios.aarch64.S
@ -0,0 +1,345 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__aarch64__) && defined(__APPLE__)
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+.text
+
+.globl	_gcm_init_neon
+.private_extern	_gcm_init_neon
+
+.align	4
+_gcm_init_neon:
+	// This function is adapted from gcm_init_v8. xC2 is t3.
+	ld1	{v17.2d}, [x1]			// load H
+	movi	v19.16b, #0xe1
+	shl	v19.2d, v19.2d, #57		// 0xc2.0
+	ext	v3.16b, v17.16b, v17.16b, #8
+	ushr	v18.2d, v19.2d, #63
+	dup	v17.4s, v17.s[1]
+	ext	v16.16b, v18.16b, v19.16b, #8	// t0=0xc2....01
+	ushr	v18.2d, v3.2d, #63
+	sshr	v17.4s, v17.4s, #31		// broadcast carry bit
+	and	v18.16b, v18.16b, v16.16b
+	shl	v3.2d, v3.2d, #1
+	ext	v18.16b, v18.16b, v18.16b, #8
+	and	v16.16b, v16.16b, v17.16b
+	orr	v3.16b, v3.16b, v18.16b	// H<<<=1
+	eor	v5.16b, v3.16b, v16.16b	// twisted H
+	st1	{v5.2d}, [x0]			// store Htable[0]
+	ret
+
+
+.globl	_gcm_gmult_neon
+.private_extern	_gcm_gmult_neon
+
+.align	4
+_gcm_gmult_neon:
+	ld1	{v3.16b}, [x0]		// load Xi
+	ld1	{v5.1d}, [x1], #8		// load twisted H
+	ld1	{v6.1d}, [x1]
+	adrp	x9, Lmasks@PAGE		// load constants
+	add	x9, x9, Lmasks@PAGEOFF
+	ld1	{v24.2d, v25.2d}, [x9]
+	rev64	v3.16b, v3.16b		// byteswap Xi
+	ext	v3.16b, v3.16b, v3.16b, #8
+	eor	v7.8b, v5.8b, v6.8b	// Karatsuba pre-processing
+
+	mov	x3, #16
+	b	Lgmult_neon
+
+
+.globl	_gcm_ghash_neon
+.private_extern	_gcm_ghash_neon
+
+.align	4
+_gcm_ghash_neon:
+	ld1	{v0.16b}, [x0]		// load Xi
+	ld1	{v5.1d}, [x1], #8		// load twisted H
+	ld1	{v6.1d}, [x1]
+	adrp	x9, Lmasks@PAGE		// load constants
+	add	x9, x9, Lmasks@PAGEOFF
+	ld1	{v24.2d, v25.2d}, [x9]
+	rev64	v0.16b, v0.16b		// byteswap Xi
+	ext	v0.16b, v0.16b, v0.16b, #8
+	eor	v7.8b, v5.8b, v6.8b	// Karatsuba pre-processing
+
+Loop_neon:
+	ld1	{v3.16b}, [x2], #16	// load inp
+	rev64	v3.16b, v3.16b		// byteswap inp
+	ext	v3.16b, v3.16b, v3.16b, #8
+	eor	v3.16b, v3.16b, v0.16b	// inp ^= Xi
+
+Lgmult_neon:
+	// Split the input into v3 and v4. (The upper halves are unused,
+	// so it is okay to leave them alone.)
+	ins	v4.d[0], v3.d[1]
+	ext	v16.8b, v5.8b, v5.8b, #1	// A1
+	pmull	v16.8h, v16.8b, v3.8b		// F = A1*B
+	ext	v0.8b, v3.8b, v3.8b, #1		// B1
+	pmull	v0.8h, v5.8b, v0.8b		// E = A*B1
+	ext	v17.8b, v5.8b, v5.8b, #2	// A2
+	pmull	v17.8h, v17.8b, v3.8b		// H = A2*B
+	ext	v19.8b, v3.8b, v3.8b, #2	// B2
+	pmull	v19.8h, v5.8b, v19.8b		// G = A*B2
+	ext	v18.8b, v5.8b, v5.8b, #3	// A3
+	eor	v16.16b, v16.16b, v0.16b	// L = E + F
+	pmull	v18.8h, v18.8b, v3.8b		// J = A3*B
+	ext	v0.8b, v3.8b, v3.8b, #3		// B3
+	eor	v17.16b, v17.16b, v19.16b	// M = G + H
+	pmull	v0.8h, v5.8b, v0.8b		// I = A*B3
+
+	// Here we diverge from the 32-bit version. It computes the following
+	// (instructions reordered for clarity):
+	//
+	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
+	//     vand	$t0#hi, $t0#hi, $k48
+	//     veor	$t0#lo, $t0#lo, $t0#hi
+	//
+	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
+	//     vand	$t1#hi, $t1#hi, $k32
+	//     veor	$t1#lo, $t1#lo, $t1#hi
+	//
+	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
+	//     vand	$t2#hi, $t2#hi, $k16
+	//     veor	$t2#lo, $t2#lo, $t2#hi
+	//
+	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
+	//     vmov.i64	$t3#hi, #0
+	//
+	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
+	// upper halves of SIMD registers, so we must split each half into
+	// separate registers. To compensate, we pair computations up and
+	// parallelize.
+
+	ext	v19.8b, v3.8b, v3.8b, #4	// B4
+	eor	v18.16b, v18.16b, v0.16b	// N = I + J
+	pmull	v19.8h, v5.8b, v19.8b		// K = A*B4
+
+	// This can probably be scheduled more efficiently. For now, we just
+	// pair up independent instructions.
+	zip1	v20.2d, v16.2d, v17.2d
+	zip1	v22.2d, v18.2d, v19.2d
+	zip2	v21.2d, v16.2d, v17.2d
+	zip2	v23.2d, v18.2d, v19.2d
+	eor	v20.16b, v20.16b, v21.16b
+	eor	v22.16b, v22.16b, v23.16b
+	and	v21.16b, v21.16b, v24.16b
+	and	v23.16b, v23.16b, v25.16b
+	eor	v20.16b, v20.16b, v21.16b
+	eor	v22.16b, v22.16b, v23.16b
+	zip1	v16.2d, v20.2d, v21.2d
+	zip1	v18.2d, v22.2d, v23.2d
+	zip2	v17.2d, v20.2d, v21.2d
+	zip2	v19.2d, v22.2d, v23.2d
+
+	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
+	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
+	pmull	v0.8h, v5.8b, v3.8b		// D = A*B
+	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
+	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
+	eor	v16.16b, v16.16b, v17.16b
+	eor	v18.16b, v18.16b, v19.16b
+	eor	v0.16b, v0.16b, v16.16b
+	eor	v0.16b, v0.16b, v18.16b
+	eor	v3.8b, v3.8b, v4.8b	// Karatsuba pre-processing
+	ext	v16.8b, v7.8b, v7.8b, #1	// A1
+	pmull	v16.8h, v16.8b, v3.8b		// F = A1*B
+	ext	v1.8b, v3.8b, v3.8b, #1		// B1
+	pmull	v1.8h, v7.8b, v1.8b		// E = A*B1
+	ext	v17.8b, v7.8b, v7.8b, #2	// A2
+	pmull	v17.8h, v17.8b, v3.8b		// H = A2*B
+	ext	v19.8b, v3.8b, v3.8b, #2	// B2
+	pmull	v19.8h, v7.8b, v19.8b		// G = A*B2
+	ext	v18.8b, v7.8b, v7.8b, #3	// A3
+	eor	v16.16b, v16.16b, v1.16b	// L = E + F
+	pmull	v18.8h, v18.8b, v3.8b		// J = A3*B
+	ext	v1.8b, v3.8b, v3.8b, #3		// B3
+	eor	v17.16b, v17.16b, v19.16b	// M = G + H
+	pmull	v1.8h, v7.8b, v1.8b		// I = A*B3
+
+	// Here we diverge from the 32-bit version. It computes the following
+	// (instructions reordered for clarity):
+	//
+	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
+	//     vand	$t0#hi, $t0#hi, $k48
+	//     veor	$t0#lo, $t0#lo, $t0#hi
+	//
+	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
+	//     vand	$t1#hi, $t1#hi, $k32
+	//     veor	$t1#lo, $t1#lo, $t1#hi
+	//
+	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
+	//     vand	$t2#hi, $t2#hi, $k16
+	//     veor	$t2#lo, $t2#lo, $t2#hi
+	//
+	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
+	//     vmov.i64	$t3#hi, #0
+	//
+	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
+	// upper halves of SIMD registers, so we must split each half into
+	// separate registers. To compensate, we pair computations up and
+	// parallelize.
+
+	ext	v19.8b, v3.8b, v3.8b, #4	// B4
+	eor	v18.16b, v18.16b, v1.16b	// N = I + J
+	pmull	v19.8h, v7.8b, v19.8b		// K = A*B4
+
+	// This can probably be scheduled more efficiently. For now, we just
+	// pair up independent instructions.
+	zip1	v20.2d, v16.2d, v17.2d
+	zip1	v22.2d, v18.2d, v19.2d
+	zip2	v21.2d, v16.2d, v17.2d
+	zip2	v23.2d, v18.2d, v19.2d
+	eor	v20.16b, v20.16b, v21.16b
+	eor	v22.16b, v22.16b, v23.16b
+	and	v21.16b, v21.16b, v24.16b
+	and	v23.16b, v23.16b, v25.16b
+	eor	v20.16b, v20.16b, v21.16b
+	eor	v22.16b, v22.16b, v23.16b
+	zip1	v16.2d, v20.2d, v21.2d
+	zip1	v18.2d, v22.2d, v23.2d
+	zip2	v17.2d, v20.2d, v21.2d
+	zip2	v19.2d, v22.2d, v23.2d
+
+	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
+	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
+	pmull	v1.8h, v7.8b, v3.8b		// D = A*B
+	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
+	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
+	eor	v16.16b, v16.16b, v17.16b
+	eor	v18.16b, v18.16b, v19.16b
+	eor	v1.16b, v1.16b, v16.16b
+	eor	v1.16b, v1.16b, v18.16b
+	ext	v16.8b, v6.8b, v6.8b, #1	// A1
+	pmull	v16.8h, v16.8b, v4.8b		// F = A1*B
+	ext	v2.8b, v4.8b, v4.8b, #1		// B1
+	pmull	v2.8h, v6.8b, v2.8b		// E = A*B1
+	ext	v17.8b, v6.8b, v6.8b, #2	// A2
+	pmull	v17.8h, v17.8b, v4.8b		// H = A2*B
+	ext	v19.8b, v4.8b, v4.8b, #2	// B2
+	pmull	v19.8h, v6.8b, v19.8b		// G = A*B2
+	ext	v18.8b, v6.8b, v6.8b, #3	// A3
+	eor	v16.16b, v16.16b, v2.16b	// L = E + F
+	pmull	v18.8h, v18.8b, v4.8b		// J = A3*B
+	ext	v2.8b, v4.8b, v4.8b, #3		// B3
+	eor	v17.16b, v17.16b, v19.16b	// M = G + H
+	pmull	v2.8h, v6.8b, v2.8b		// I = A*B3
+
+	// Here we diverge from the 32-bit version. It computes the following
+	// (instructions reordered for clarity):
+	//
+	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
+	//     vand	$t0#hi, $t0#hi, $k48
+	//     veor	$t0#lo, $t0#lo, $t0#hi
+	//
+	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
+	//     vand	$t1#hi, $t1#hi, $k32
+	//     veor	$t1#lo, $t1#lo, $t1#hi
+	//
+	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
+	//     vand	$t2#hi, $t2#hi, $k16
+	//     veor	$t2#lo, $t2#lo, $t2#hi
+	//
+	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
+	//     vmov.i64	$t3#hi, #0
+	//
+	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
+	// upper halves of SIMD registers, so we must split each half into
+	// separate registers. To compensate, we pair computations up and
+	// parallelize.
+
+	ext	v19.8b, v4.8b, v4.8b, #4	// B4
+	eor	v18.16b, v18.16b, v2.16b	// N = I + J
+	pmull	v19.8h, v6.8b, v19.8b		// K = A*B4
+
+	// This can probably be scheduled more efficiently. For now, we just
+	// pair up independent instructions.
+	zip1	v20.2d, v16.2d, v17.2d
+	zip1	v22.2d, v18.2d, v19.2d
+	zip2	v21.2d, v16.2d, v17.2d
+	zip2	v23.2d, v18.2d, v19.2d
+	eor	v20.16b, v20.16b, v21.16b
+	eor	v22.16b, v22.16b, v23.16b
+	and	v21.16b, v21.16b, v24.16b
+	and	v23.16b, v23.16b, v25.16b
+	eor	v20.16b, v20.16b, v21.16b
+	eor	v22.16b, v22.16b, v23.16b
+	zip1	v16.2d, v20.2d, v21.2d
+	zip1	v18.2d, v22.2d, v23.2d
+	zip2	v17.2d, v20.2d, v21.2d
+	zip2	v19.2d, v22.2d, v23.2d
+
+	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
+	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
+	pmull	v2.8h, v6.8b, v4.8b		// D = A*B
+	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
+	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
+	eor	v16.16b, v16.16b, v17.16b
+	eor	v18.16b, v18.16b, v19.16b
+	eor	v2.16b, v2.16b, v16.16b
+	eor	v2.16b, v2.16b, v18.16b
+	ext	v16.16b, v0.16b, v2.16b, #8
+	eor	v1.16b, v1.16b, v0.16b	// Karatsuba post-processing
+	eor	v1.16b, v1.16b, v2.16b
+	eor	v1.16b, v1.16b, v16.16b	// Xm overlaps Xh.lo and Xl.hi
+	ins	v0.d[1], v1.d[0]		// Xh|Xl - 256-bit result
+	// This is a no-op due to the ins instruction below.
+	// ins	v2.d[0], v1.d[1]
+
+	// equivalent of reduction_avx from ghash-x86_64.pl
+	shl	v17.2d, v0.2d, #57		// 1st phase
+	shl	v18.2d, v0.2d, #62
+	eor	v18.16b, v18.16b, v17.16b	//
+	shl	v17.2d, v0.2d, #63
+	eor	v18.16b, v18.16b, v17.16b	//
+	// Note Xm contains {Xl.d[1], Xh.d[0]}.
+	eor	v18.16b, v18.16b, v1.16b
+	ins	v0.d[1], v18.d[0]		// Xl.d[1] ^= t2.d[0]
+	ins	v2.d[0], v18.d[1]		// Xh.d[0] ^= t2.d[1]
+
+	ushr	v18.2d, v0.2d, #1		// 2nd phase
+	eor	v2.16b, v2.16b,v0.16b
+	eor	v0.16b, v0.16b,v18.16b	//
+	ushr	v18.2d, v18.2d, #6
+	ushr	v0.2d, v0.2d, #1		//
+	eor	v0.16b, v0.16b, v2.16b	//
+	eor	v0.16b, v0.16b, v18.16b	//
+
+	subs	x3, x3, #16
+	bne	Loop_neon
+
+	rev64	v0.16b, v0.16b		// byteswap Xi and write
+	ext	v0.16b, v0.16b, v0.16b, #8
+	st1	{v0.16b}, [x0]
+
+	ret
+
+
+.section	__TEXT,__const
+.align	4
+Lmasks:
+.quad	0x0000ffffffffffff	// k48
+.quad	0x00000000ffffffff	// k32
+.quad	0x000000000000ffff	// k16
+.quad	0x0000000000000000	// k0
+.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	2
+#endif  // !OPENSSL_NO_ASM
+#endif  // defined(__aarch64__) && defined(__APPLE__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-neon-armv8.linux.aarch64.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-neon-armv8.linux.aarch64.S
@ -0,0 +1,348 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__aarch64__) && defined(__linux__)
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(__aarch64__)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+.text
+
+.globl	gcm_init_neon
+.hidden	gcm_init_neon
+.type	gcm_init_neon,%function
+.align	4
+gcm_init_neon:
+	// This function is adapted from gcm_init_v8. xC2 is t3.
+	ld1	{v17.2d}, [x1]			// load H
+	movi	v19.16b, #0xe1
+	shl	v19.2d, v19.2d, #57		// 0xc2.0
+	ext	v3.16b, v17.16b, v17.16b, #8
+	ushr	v18.2d, v19.2d, #63
+	dup	v17.4s, v17.s[1]
+	ext	v16.16b, v18.16b, v19.16b, #8	// t0=0xc2....01
+	ushr	v18.2d, v3.2d, #63
+	sshr	v17.4s, v17.4s, #31		// broadcast carry bit
+	and	v18.16b, v18.16b, v16.16b
+	shl	v3.2d, v3.2d, #1
+	ext	v18.16b, v18.16b, v18.16b, #8
+	and	v16.16b, v16.16b, v17.16b
+	orr	v3.16b, v3.16b, v18.16b	// H<<<=1
+	eor	v5.16b, v3.16b, v16.16b	// twisted H
+	st1	{v5.2d}, [x0]			// store Htable[0]
+	ret
+.size	gcm_init_neon,.-gcm_init_neon
+
+.globl	gcm_gmult_neon
+.hidden	gcm_gmult_neon
+.type	gcm_gmult_neon,%function
+.align	4
+gcm_gmult_neon:
+	ld1	{v3.16b}, [x0]		// load Xi
+	ld1	{v5.1d}, [x1], #8		// load twisted H
+	ld1	{v6.1d}, [x1]
+	adrp	x9, .Lmasks		// load constants
+	add	x9, x9, :lo12:.Lmasks
+	ld1	{v24.2d, v25.2d}, [x9]
+	rev64	v3.16b, v3.16b		// byteswap Xi
+	ext	v3.16b, v3.16b, v3.16b, #8
+	eor	v7.8b, v5.8b, v6.8b	// Karatsuba pre-processing
+
+	mov	x3, #16
+	b	.Lgmult_neon
+.size	gcm_gmult_neon,.-gcm_gmult_neon
+
+.globl	gcm_ghash_neon
+.hidden	gcm_ghash_neon
+.type	gcm_ghash_neon,%function
+.align	4
+gcm_ghash_neon:
+	ld1	{v0.16b}, [x0]		// load Xi
+	ld1	{v5.1d}, [x1], #8		// load twisted H
+	ld1	{v6.1d}, [x1]
+	adrp	x9, .Lmasks		// load constants
+	add	x9, x9, :lo12:.Lmasks
+	ld1	{v24.2d, v25.2d}, [x9]
+	rev64	v0.16b, v0.16b		// byteswap Xi
+	ext	v0.16b, v0.16b, v0.16b, #8
+	eor	v7.8b, v5.8b, v6.8b	// Karatsuba pre-processing
+
+.Loop_neon:
+	ld1	{v3.16b}, [x2], #16	// load inp
+	rev64	v3.16b, v3.16b		// byteswap inp
+	ext	v3.16b, v3.16b, v3.16b, #8
+	eor	v3.16b, v3.16b, v0.16b	// inp ^= Xi
+
+.Lgmult_neon:
+	// Split the input into v3 and v4. (The upper halves are unused,
+	// so it is okay to leave them alone.)
+	ins	v4.d[0], v3.d[1]
+	ext	v16.8b, v5.8b, v5.8b, #1	// A1
+	pmull	v16.8h, v16.8b, v3.8b		// F = A1*B
+	ext	v0.8b, v3.8b, v3.8b, #1		// B1
+	pmull	v0.8h, v5.8b, v0.8b		// E = A*B1
+	ext	v17.8b, v5.8b, v5.8b, #2	// A2
+	pmull	v17.8h, v17.8b, v3.8b		// H = A2*B
+	ext	v19.8b, v3.8b, v3.8b, #2	// B2
+	pmull	v19.8h, v5.8b, v19.8b		// G = A*B2
+	ext	v18.8b, v5.8b, v5.8b, #3	// A3
+	eor	v16.16b, v16.16b, v0.16b	// L = E + F
+	pmull	v18.8h, v18.8b, v3.8b		// J = A3*B
+	ext	v0.8b, v3.8b, v3.8b, #3		// B3
+	eor	v17.16b, v17.16b, v19.16b	// M = G + H
+	pmull	v0.8h, v5.8b, v0.8b		// I = A*B3
+
+	// Here we diverge from the 32-bit version. It computes the following
+	// (instructions reordered for clarity):
+	//
+	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
+	//     vand	$t0#hi, $t0#hi, $k48
+	//     veor	$t0#lo, $t0#lo, $t0#hi
+	//
+	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
+	//     vand	$t1#hi, $t1#hi, $k32
+	//     veor	$t1#lo, $t1#lo, $t1#hi
+	//
+	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
+	//     vand	$t2#hi, $t2#hi, $k16
+	//     veor	$t2#lo, $t2#lo, $t2#hi
+	//
+	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
+	//     vmov.i64	$t3#hi, #0
+	//
+	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
+	// upper halves of SIMD registers, so we must split each half into
+	// separate registers. To compensate, we pair computations up and
+	// parallelize.
+
+	ext	v19.8b, v3.8b, v3.8b, #4	// B4
+	eor	v18.16b, v18.16b, v0.16b	// N = I + J
+	pmull	v19.8h, v5.8b, v19.8b		// K = A*B4
+
+	// This can probably be scheduled more efficiently. For now, we just
+	// pair up independent instructions.
+	zip1	v20.2d, v16.2d, v17.2d
+	zip1	v22.2d, v18.2d, v19.2d
+	zip2	v21.2d, v16.2d, v17.2d
+	zip2	v23.2d, v18.2d, v19.2d
+	eor	v20.16b, v20.16b, v21.16b
+	eor	v22.16b, v22.16b, v23.16b
+	and	v21.16b, v21.16b, v24.16b
+	and	v23.16b, v23.16b, v25.16b
+	eor	v20.16b, v20.16b, v21.16b
+	eor	v22.16b, v22.16b, v23.16b
+	zip1	v16.2d, v20.2d, v21.2d
+	zip1	v18.2d, v22.2d, v23.2d
+	zip2	v17.2d, v20.2d, v21.2d
+	zip2	v19.2d, v22.2d, v23.2d
+
+	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
+	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
+	pmull	v0.8h, v5.8b, v3.8b		// D = A*B
+	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
+	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
+	eor	v16.16b, v16.16b, v17.16b
+	eor	v18.16b, v18.16b, v19.16b
+	eor	v0.16b, v0.16b, v16.16b
+	eor	v0.16b, v0.16b, v18.16b
+	eor	v3.8b, v3.8b, v4.8b	// Karatsuba pre-processing
+	ext	v16.8b, v7.8b, v7.8b, #1	// A1
+	pmull	v16.8h, v16.8b, v3.8b		// F = A1*B
+	ext	v1.8b, v3.8b, v3.8b, #1		// B1
+	pmull	v1.8h, v7.8b, v1.8b		// E = A*B1
+	ext	v17.8b, v7.8b, v7.8b, #2	// A2
+	pmull	v17.8h, v17.8b, v3.8b		// H = A2*B
+	ext	v19.8b, v3.8b, v3.8b, #2	// B2
+	pmull	v19.8h, v7.8b, v19.8b		// G = A*B2
+	ext	v18.8b, v7.8b, v7.8b, #3	// A3
+	eor	v16.16b, v16.16b, v1.16b	// L = E + F
+	pmull	v18.8h, v18.8b, v3.8b		// J = A3*B
+	ext	v1.8b, v3.8b, v3.8b, #3		// B3
+	eor	v17.16b, v17.16b, v19.16b	// M = G + H
+	pmull	v1.8h, v7.8b, v1.8b		// I = A*B3
+
+	// Here we diverge from the 32-bit version. It computes the following
+	// (instructions reordered for clarity):
+	//
+	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
+	//     vand	$t0#hi, $t0#hi, $k48
+	//     veor	$t0#lo, $t0#lo, $t0#hi
+	//
+	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
+	//     vand	$t1#hi, $t1#hi, $k32
+	//     veor	$t1#lo, $t1#lo, $t1#hi
+	//
+	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
+	//     vand	$t2#hi, $t2#hi, $k16
+	//     veor	$t2#lo, $t2#lo, $t2#hi
+	//
+	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
+	//     vmov.i64	$t3#hi, #0
+	//
+	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
+	// upper halves of SIMD registers, so we must split each half into
+	// separate registers. To compensate, we pair computations up and
+	// parallelize.
+
+	ext	v19.8b, v3.8b, v3.8b, #4	// B4
+	eor	v18.16b, v18.16b, v1.16b	// N = I + J
+	pmull	v19.8h, v7.8b, v19.8b		// K = A*B4
+
+	// This can probably be scheduled more efficiently. For now, we just
+	// pair up independent instructions.
+	zip1	v20.2d, v16.2d, v17.2d
+	zip1	v22.2d, v18.2d, v19.2d
+	zip2	v21.2d, v16.2d, v17.2d
+	zip2	v23.2d, v18.2d, v19.2d
+	eor	v20.16b, v20.16b, v21.16b
+	eor	v22.16b, v22.16b, v23.16b
+	and	v21.16b, v21.16b, v24.16b
+	and	v23.16b, v23.16b, v25.16b
+	eor	v20.16b, v20.16b, v21.16b
+	eor	v22.16b, v22.16b, v23.16b
+	zip1	v16.2d, v20.2d, v21.2d
+	zip1	v18.2d, v22.2d, v23.2d
+	zip2	v17.2d, v20.2d, v21.2d
+	zip2	v19.2d, v22.2d, v23.2d
+
+	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
+	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
+	pmull	v1.8h, v7.8b, v3.8b		// D = A*B
+	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
+	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
+	eor	v16.16b, v16.16b, v17.16b
+	eor	v18.16b, v18.16b, v19.16b
+	eor	v1.16b, v1.16b, v16.16b
+	eor	v1.16b, v1.16b, v18.16b
+	ext	v16.8b, v6.8b, v6.8b, #1	// A1
+	pmull	v16.8h, v16.8b, v4.8b		// F = A1*B
+	ext	v2.8b, v4.8b, v4.8b, #1		// B1
+	pmull	v2.8h, v6.8b, v2.8b		// E = A*B1
+	ext	v17.8b, v6.8b, v6.8b, #2	// A2
+	pmull	v17.8h, v17.8b, v4.8b		// H = A2*B
+	ext	v19.8b, v4.8b, v4.8b, #2	// B2
+	pmull	v19.8h, v6.8b, v19.8b		// G = A*B2
+	ext	v18.8b, v6.8b, v6.8b, #3	// A3
+	eor	v16.16b, v16.16b, v2.16b	// L = E + F
+	pmull	v18.8h, v18.8b, v4.8b		// J = A3*B
+	ext	v2.8b, v4.8b, v4.8b, #3		// B3
+	eor	v17.16b, v17.16b, v19.16b	// M = G + H
+	pmull	v2.8h, v6.8b, v2.8b		// I = A*B3
+
+	// Here we diverge from the 32-bit version. It computes the following
+	// (instructions reordered for clarity):
+	//
+	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
+	//     vand	$t0#hi, $t0#hi, $k48
+	//     veor	$t0#lo, $t0#lo, $t0#hi
+	//
+	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
+	//     vand	$t1#hi, $t1#hi, $k32
+	//     veor	$t1#lo, $t1#lo, $t1#hi
+	//
+	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
+	//     vand	$t2#hi, $t2#hi, $k16
+	//     veor	$t2#lo, $t2#lo, $t2#hi
+	//
+	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
+	//     vmov.i64	$t3#hi, #0
+	//
+	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
+	// upper halves of SIMD registers, so we must split each half into
+	// separate registers. To compensate, we pair computations up and
+	// parallelize.
+
+	ext	v19.8b, v4.8b, v4.8b, #4	// B4
+	eor	v18.16b, v18.16b, v2.16b	// N = I + J
+	pmull	v19.8h, v6.8b, v19.8b		// K = A*B4
+
+	// This can probably be scheduled more efficiently. For now, we just
+	// pair up independent instructions.
+	zip1	v20.2d, v16.2d, v17.2d
+	zip1	v22.2d, v18.2d, v19.2d
+	zip2	v21.2d, v16.2d, v17.2d
+	zip2	v23.2d, v18.2d, v19.2d
+	eor	v20.16b, v20.16b, v21.16b
+	eor	v22.16b, v22.16b, v23.16b
+	and	v21.16b, v21.16b, v24.16b
+	and	v23.16b, v23.16b, v25.16b
+	eor	v20.16b, v20.16b, v21.16b
+	eor	v22.16b, v22.16b, v23.16b
+	zip1	v16.2d, v20.2d, v21.2d
+	zip1	v18.2d, v22.2d, v23.2d
+	zip2	v17.2d, v20.2d, v21.2d
+	zip2	v19.2d, v22.2d, v23.2d
+
+	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
+	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
+	pmull	v2.8h, v6.8b, v4.8b		// D = A*B
+	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
+	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
+	eor	v16.16b, v16.16b, v17.16b
+	eor	v18.16b, v18.16b, v19.16b
+	eor	v2.16b, v2.16b, v16.16b
+	eor	v2.16b, v2.16b, v18.16b
+	ext	v16.16b, v0.16b, v2.16b, #8
+	eor	v1.16b, v1.16b, v0.16b	// Karatsuba post-processing
+	eor	v1.16b, v1.16b, v2.16b
+	eor	v1.16b, v1.16b, v16.16b	// Xm overlaps Xh.lo and Xl.hi
+	ins	v0.d[1], v1.d[0]		// Xh|Xl - 256-bit result
+	// This is a no-op due to the ins instruction below.
+	// ins	v2.d[0], v1.d[1]
+
+	// equivalent of reduction_avx from ghash-x86_64.pl
+	shl	v17.2d, v0.2d, #57		// 1st phase
+	shl	v18.2d, v0.2d, #62
+	eor	v18.16b, v18.16b, v17.16b	//
+	shl	v17.2d, v0.2d, #63
+	eor	v18.16b, v18.16b, v17.16b	//
+	// Note Xm contains {Xl.d[1], Xh.d[0]}.
+	eor	v18.16b, v18.16b, v1.16b
+	ins	v0.d[1], v18.d[0]		// Xl.d[1] ^= t2.d[0]
+	ins	v2.d[0], v18.d[1]		// Xh.d[0] ^= t2.d[1]
+
+	ushr	v18.2d, v0.2d, #1		// 2nd phase
+	eor	v2.16b, v2.16b,v0.16b
+	eor	v0.16b, v0.16b,v18.16b	//
+	ushr	v18.2d, v18.2d, #6
+	ushr	v0.2d, v0.2d, #1		//
+	eor	v0.16b, v0.16b, v2.16b	//
+	eor	v0.16b, v0.16b, v18.16b	//
+
+	subs	x3, x3, #16
+	bne	.Loop_neon
+
+	rev64	v0.16b, v0.16b		// byteswap Xi and write
+	ext	v0.16b, v0.16b, v0.16b, #8
+	st1	{v0.16b}, [x0]
+
+	ret
+.size	gcm_ghash_neon,.-gcm_ghash_neon
+
+.section	.rodata
+.align	4
+.Lmasks:
+.quad	0x0000ffffffffffff	// k48
+.quad	0x00000000ffffffff	// k32
+.quad	0x000000000000ffff	// k16
+.quad	0x0000000000000000	// k0
+.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	2
+#endif
+#endif  // !OPENSSL_NO_ASM
+.section	.note.GNU-stack,"",%progbits
+#endif  // defined(__aarch64__) && defined(__linux__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-ssse3-x86.linux.x86.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-ssse3-x86.linux.x86.S
@ -0,0 +1,301 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__i386__) && defined(__linux__)
+# This file is generated from a similarly-named Perl script in the BoringSSL
+# source tree. Do not edit by hand.
+
+#if defined(__i386__)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+.text
+.globl	gcm_gmult_ssse3
+.hidden	gcm_gmult_ssse3
+.type	gcm_gmult_ssse3,@function
+.align	16
+gcm_gmult_ssse3:
+.L_gcm_gmult_ssse3_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%edi
+	movl	24(%esp),%esi
+	movdqu	(%edi),%xmm0
+	call	.L000pic_point
+.L000pic_point:
+	popl	%eax
+	movdqa	.Lreverse_bytes-.L000pic_point(%eax),%xmm7
+	movdqa	.Llow4_mask-.L000pic_point(%eax),%xmm2
+.byte	102,15,56,0,199
+	movdqa	%xmm2,%xmm1
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm2,%xmm0
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	movl	$5,%eax
+.L001loop_row_1:
+	movdqa	(%esi),%xmm4
+	leal	16(%esi),%esi
+	movdqa	%xmm2,%xmm6
+.byte	102,15,58,15,243,1
+	movdqa	%xmm6,%xmm3
+	psrldq	$1,%xmm2
+	movdqa	%xmm4,%xmm5
+.byte	102,15,56,0,224
+.byte	102,15,56,0,233
+	pxor	%xmm5,%xmm2
+	movdqa	%xmm4,%xmm5
+	psllq	$60,%xmm5
+	movdqa	%xmm5,%xmm6
+	pslldq	$8,%xmm6
+	pxor	%xmm6,%xmm3
+	psrldq	$8,%xmm5
+	pxor	%xmm5,%xmm2
+	psrlq	$4,%xmm4
+	pxor	%xmm4,%xmm2
+	subl	$1,%eax
+	jnz	.L001loop_row_1
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$5,%xmm3
+	pxor	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movl	$5,%eax
+.L002loop_row_2:
+	movdqa	(%esi),%xmm4
+	leal	16(%esi),%esi
+	movdqa	%xmm2,%xmm6
+.byte	102,15,58,15,243,1
+	movdqa	%xmm6,%xmm3
+	psrldq	$1,%xmm2
+	movdqa	%xmm4,%xmm5
+.byte	102,15,56,0,224
+.byte	102,15,56,0,233
+	pxor	%xmm5,%xmm2
+	movdqa	%xmm4,%xmm5
+	psllq	$60,%xmm5
+	movdqa	%xmm5,%xmm6
+	pslldq	$8,%xmm6
+	pxor	%xmm6,%xmm3
+	psrldq	$8,%xmm5
+	pxor	%xmm5,%xmm2
+	psrlq	$4,%xmm4
+	pxor	%xmm4,%xmm2
+	subl	$1,%eax
+	jnz	.L002loop_row_2
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$5,%xmm3
+	pxor	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movl	$6,%eax
+.L003loop_row_3:
+	movdqa	(%esi),%xmm4
+	leal	16(%esi),%esi
+	movdqa	%xmm2,%xmm6
+.byte	102,15,58,15,243,1
+	movdqa	%xmm6,%xmm3
+	psrldq	$1,%xmm2
+	movdqa	%xmm4,%xmm5
+.byte	102,15,56,0,224
+.byte	102,15,56,0,233
+	pxor	%xmm5,%xmm2
+	movdqa	%xmm4,%xmm5
+	psllq	$60,%xmm5
+	movdqa	%xmm5,%xmm6
+	pslldq	$8,%xmm6
+	pxor	%xmm6,%xmm3
+	psrldq	$8,%xmm5
+	pxor	%xmm5,%xmm2
+	psrlq	$4,%xmm4
+	pxor	%xmm4,%xmm2
+	subl	$1,%eax
+	jnz	.L003loop_row_3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$5,%xmm3
+	pxor	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+.byte	102,15,56,0,215
+	movdqu	%xmm2,(%edi)
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	gcm_gmult_ssse3,.-.L_gcm_gmult_ssse3_begin
+.globl	gcm_ghash_ssse3
+.hidden	gcm_ghash_ssse3
+.type	gcm_ghash_ssse3,@function
+.align	16
+gcm_ghash_ssse3:
+.L_gcm_ghash_ssse3_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%edi
+	movl	24(%esp),%esi
+	movl	28(%esp),%edx
+	movl	32(%esp),%ecx
+	movdqu	(%edi),%xmm0
+	call	.L004pic_point
+.L004pic_point:
+	popl	%ebx
+	movdqa	.Lreverse_bytes-.L004pic_point(%ebx),%xmm7
+	andl	$-16,%ecx
+.byte	102,15,56,0,199
+	pxor	%xmm3,%xmm3
+.L005loop_ghash:
+	movdqa	.Llow4_mask-.L004pic_point(%ebx),%xmm2
+	movdqu	(%edx),%xmm1
+.byte	102,15,56,0,207
+	pxor	%xmm1,%xmm0
+	movdqa	%xmm2,%xmm1
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm2,%xmm0
+	pxor	%xmm2,%xmm2
+	movl	$5,%eax
+.L006loop_row_4:
+	movdqa	(%esi),%xmm4
+	leal	16(%esi),%esi
+	movdqa	%xmm2,%xmm6
+.byte	102,15,58,15,243,1
+	movdqa	%xmm6,%xmm3
+	psrldq	$1,%xmm2
+	movdqa	%xmm4,%xmm5
+.byte	102,15,56,0,224
+.byte	102,15,56,0,233
+	pxor	%xmm5,%xmm2
+	movdqa	%xmm4,%xmm5
+	psllq	$60,%xmm5
+	movdqa	%xmm5,%xmm6
+	pslldq	$8,%xmm6
+	pxor	%xmm6,%xmm3
+	psrldq	$8,%xmm5
+	pxor	%xmm5,%xmm2
+	psrlq	$4,%xmm4
+	pxor	%xmm4,%xmm2
+	subl	$1,%eax
+	jnz	.L006loop_row_4
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$5,%xmm3
+	pxor	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movl	$5,%eax
+.L007loop_row_5:
+	movdqa	(%esi),%xmm4
+	leal	16(%esi),%esi
+	movdqa	%xmm2,%xmm6
+.byte	102,15,58,15,243,1
+	movdqa	%xmm6,%xmm3
+	psrldq	$1,%xmm2
+	movdqa	%xmm4,%xmm5
+.byte	102,15,56,0,224
+.byte	102,15,56,0,233
+	pxor	%xmm5,%xmm2
+	movdqa	%xmm4,%xmm5
+	psllq	$60,%xmm5
+	movdqa	%xmm5,%xmm6
+	pslldq	$8,%xmm6
+	pxor	%xmm6,%xmm3
+	psrldq	$8,%xmm5
+	pxor	%xmm5,%xmm2
+	psrlq	$4,%xmm4
+	pxor	%xmm4,%xmm2
+	subl	$1,%eax
+	jnz	.L007loop_row_5
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$5,%xmm3
+	pxor	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movl	$6,%eax
+.L008loop_row_6:
+	movdqa	(%esi),%xmm4
+	leal	16(%esi),%esi
+	movdqa	%xmm2,%xmm6
+.byte	102,15,58,15,243,1
+	movdqa	%xmm6,%xmm3
+	psrldq	$1,%xmm2
+	movdqa	%xmm4,%xmm5
+.byte	102,15,56,0,224
+.byte	102,15,56,0,233
+	pxor	%xmm5,%xmm2
+	movdqa	%xmm4,%xmm5
+	psllq	$60,%xmm5
+	movdqa	%xmm5,%xmm6
+	pslldq	$8,%xmm6
+	pxor	%xmm6,%xmm3
+	psrldq	$8,%xmm5
+	pxor	%xmm5,%xmm2
+	psrlq	$4,%xmm4
+	pxor	%xmm4,%xmm2
+	subl	$1,%eax
+	jnz	.L008loop_row_6
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$5,%xmm3
+	pxor	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movdqa	%xmm2,%xmm0
+	leal	-256(%esi),%esi
+	leal	16(%edx),%edx
+	subl	$16,%ecx
+	jnz	.L005loop_ghash
+.byte	102,15,56,0,199
+	movdqu	%xmm0,(%edi)
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	gcm_ghash_ssse3,.-.L_gcm_ghash_ssse3_begin
+.align	16
+.Lreverse_bytes:
+.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.align	16
+.Llow4_mask:
+.long	252645135,252645135,252645135,252645135
+#endif
+.section	.note.GNU-stack,"",@progbits
+#endif  // defined(__i386__) && defined(__linux__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-ssse3-x86_64.linux.x86_64.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-ssse3-x86_64.linux.x86_64.S
@ -0,0 +1,434 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__x86_64__) && defined(__linux__)
+# This file is generated from a similarly-named Perl script in the BoringSSL
+# source tree. Do not edit by hand.
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+#endif
+
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+.text	
+
+
+
+
+
+.type	gcm_gmult_ssse3, @function
+.globl	gcm_gmult_ssse3
+.hidden gcm_gmult_ssse3
+.align	16
+gcm_gmult_ssse3:
+.cfi_startproc	
+.Lgmult_seh_begin:
+	movdqu	(%rdi),%xmm0
+	movdqa	.Lreverse_bytes(%rip),%xmm10
+	movdqa	.Llow4_mask(%rip),%xmm2
+
+
+.byte	102,65,15,56,0,194
+
+
+	movdqa	%xmm2,%xmm1
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm2,%xmm0
+
+
+
+
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	movq	$5,%rax
+.Loop_row_1:
+	movdqa	(%rsi),%xmm4
+	leaq	16(%rsi),%rsi
+
+
+	movdqa	%xmm2,%xmm6
+.byte	102,15,58,15,243,1
+	movdqa	%xmm6,%xmm3
+	psrldq	$1,%xmm2
+
+
+
+
+	movdqa	%xmm4,%xmm5
+.byte	102,15,56,0,224
+.byte	102,15,56,0,233
+
+
+	pxor	%xmm5,%xmm2
+
+
+
+	movdqa	%xmm4,%xmm5
+	psllq	$60,%xmm5
+	movdqa	%xmm5,%xmm6
+	pslldq	$8,%xmm6
+	pxor	%xmm6,%xmm3
+
+
+	psrldq	$8,%xmm5
+	pxor	%xmm5,%xmm2
+	psrlq	$4,%xmm4
+	pxor	%xmm4,%xmm2
+
+	subq	$1,%rax
+	jnz	.Loop_row_1
+
+
+
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$5,%xmm3
+	pxor	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movq	$5,%rax
+.Loop_row_2:
+	movdqa	(%rsi),%xmm4
+	leaq	16(%rsi),%rsi
+
+
+	movdqa	%xmm2,%xmm6
+.byte	102,15,58,15,243,1
+	movdqa	%xmm6,%xmm3
+	psrldq	$1,%xmm2
+
+
+
+
+	movdqa	%xmm4,%xmm5
+.byte	102,15,56,0,224
+.byte	102,15,56,0,233
+
+
+	pxor	%xmm5,%xmm2
+
+
+
+	movdqa	%xmm4,%xmm5
+	psllq	$60,%xmm5
+	movdqa	%xmm5,%xmm6
+	pslldq	$8,%xmm6
+	pxor	%xmm6,%xmm3
+
+
+	psrldq	$8,%xmm5
+	pxor	%xmm5,%xmm2
+	psrlq	$4,%xmm4
+	pxor	%xmm4,%xmm2
+
+	subq	$1,%rax
+	jnz	.Loop_row_2
+
+
+
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$5,%xmm3
+	pxor	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movq	$6,%rax
+.Loop_row_3:
+	movdqa	(%rsi),%xmm4
+	leaq	16(%rsi),%rsi
+
+
+	movdqa	%xmm2,%xmm6
+.byte	102,15,58,15,243,1
+	movdqa	%xmm6,%xmm3
+	psrldq	$1,%xmm2
+
+
+
+
+	movdqa	%xmm4,%xmm5
+.byte	102,15,56,0,224
+.byte	102,15,56,0,233
+
+
+	pxor	%xmm5,%xmm2
+
+
+
+	movdqa	%xmm4,%xmm5
+	psllq	$60,%xmm5
+	movdqa	%xmm5,%xmm6
+	pslldq	$8,%xmm6
+	pxor	%xmm6,%xmm3
+
+
+	psrldq	$8,%xmm5
+	pxor	%xmm5,%xmm2
+	psrlq	$4,%xmm4
+	pxor	%xmm4,%xmm2
+
+	subq	$1,%rax
+	jnz	.Loop_row_3
+
+
+
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$5,%xmm3
+	pxor	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+
+.byte	102,65,15,56,0,210
+	movdqu	%xmm2,(%rdi)
+
+
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	.byte	0xf3,0xc3
+.Lgmult_seh_end:
+.cfi_endproc	
+.size	gcm_gmult_ssse3,.-gcm_gmult_ssse3
+
+
+
+
+
+.type	gcm_ghash_ssse3, @function
+.globl	gcm_ghash_ssse3
+.hidden gcm_ghash_ssse3
+.align	16
+gcm_ghash_ssse3:
+.Lghash_seh_begin:
+.cfi_startproc	
+	movdqu	(%rdi),%xmm0
+	movdqa	.Lreverse_bytes(%rip),%xmm10
+	movdqa	.Llow4_mask(%rip),%xmm11
+
+
+	andq	$-16,%rcx
+
+
+
+.byte	102,65,15,56,0,194
+
+
+	pxor	%xmm3,%xmm3
+.Loop_ghash:
+
+	movdqu	(%rdx),%xmm1
+.byte	102,65,15,56,0,202
+	pxor	%xmm1,%xmm0
+
+
+	movdqa	%xmm11,%xmm1
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm11,%xmm0
+
+
+
+
+	pxor	%xmm2,%xmm2
+
+	movq	$5,%rax
+.Loop_row_4:
+	movdqa	(%rsi),%xmm4
+	leaq	16(%rsi),%rsi
+
+
+	movdqa	%xmm2,%xmm6
+.byte	102,15,58,15,243,1
+	movdqa	%xmm6,%xmm3
+	psrldq	$1,%xmm2
+
+
+
+
+	movdqa	%xmm4,%xmm5
+.byte	102,15,56,0,224
+.byte	102,15,56,0,233
+
+
+	pxor	%xmm5,%xmm2
+
+
+
+	movdqa	%xmm4,%xmm5
+	psllq	$60,%xmm5
+	movdqa	%xmm5,%xmm6
+	pslldq	$8,%xmm6
+	pxor	%xmm6,%xmm3
+
+
+	psrldq	$8,%xmm5
+	pxor	%xmm5,%xmm2
+	psrlq	$4,%xmm4
+	pxor	%xmm4,%xmm2
+
+	subq	$1,%rax
+	jnz	.Loop_row_4
+
+
+
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$5,%xmm3
+	pxor	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movq	$5,%rax
+.Loop_row_5:
+	movdqa	(%rsi),%xmm4
+	leaq	16(%rsi),%rsi
+
+
+	movdqa	%xmm2,%xmm6
+.byte	102,15,58,15,243,1
+	movdqa	%xmm6,%xmm3
+	psrldq	$1,%xmm2
+
+
+
+
+	movdqa	%xmm4,%xmm5
+.byte	102,15,56,0,224
+.byte	102,15,56,0,233
+
+
+	pxor	%xmm5,%xmm2
+
+
+
+	movdqa	%xmm4,%xmm5
+	psllq	$60,%xmm5
+	movdqa	%xmm5,%xmm6
+	pslldq	$8,%xmm6
+	pxor	%xmm6,%xmm3
+
+
+	psrldq	$8,%xmm5
+	pxor	%xmm5,%xmm2
+	psrlq	$4,%xmm4
+	pxor	%xmm4,%xmm2
+
+	subq	$1,%rax
+	jnz	.Loop_row_5
+
+
+
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$5,%xmm3
+	pxor	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movq	$6,%rax
+.Loop_row_6:
+	movdqa	(%rsi),%xmm4
+	leaq	16(%rsi),%rsi
+
+
+	movdqa	%xmm2,%xmm6
+.byte	102,15,58,15,243,1
+	movdqa	%xmm6,%xmm3
+	psrldq	$1,%xmm2
+
+
+
+
+	movdqa	%xmm4,%xmm5
+.byte	102,15,56,0,224
+.byte	102,15,56,0,233
+
+
+	pxor	%xmm5,%xmm2
+
+
+
+	movdqa	%xmm4,%xmm5
+	psllq	$60,%xmm5
+	movdqa	%xmm5,%xmm6
+	pslldq	$8,%xmm6
+	pxor	%xmm6,%xmm3
+
+
+	psrldq	$8,%xmm5
+	pxor	%xmm5,%xmm2
+	psrlq	$4,%xmm4
+	pxor	%xmm4,%xmm2
+
+	subq	$1,%rax
+	jnz	.Loop_row_6
+
+
+
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$5,%xmm3
+	pxor	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movdqa	%xmm2,%xmm0
+
+
+	leaq	-256(%rsi),%rsi
+
+
+	leaq	16(%rdx),%rdx
+	subq	$16,%rcx
+	jnz	.Loop_ghash
+
+
+.byte	102,65,15,56,0,194
+	movdqu	%xmm0,(%rdi)
+
+
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	.byte	0xf3,0xc3
+.Lghash_seh_end:
+.cfi_endproc	
+.size	gcm_ghash_ssse3,.-gcm_ghash_ssse3
+
+.align	16
+
+
+.Lreverse_bytes:
+.byte	15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+.Llow4_mask:
+.quad	0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+#endif
+.section	.note.GNU-stack,"",@progbits
+#endif  // defined(__x86_64__) && defined(__linux__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-ssse3-x86_64.mac.x86_64.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-ssse3-x86_64.mac.x86_64.S
@ -0,0 +1,433 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__x86_64__) && defined(__APPLE__)
+# This file is generated from a similarly-named Perl script in the BoringSSL
+# source tree. Do not edit by hand.
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+#endif
+
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+.text	
+
+
+
+
+
+
+.globl	_gcm_gmult_ssse3
+.private_extern _gcm_gmult_ssse3
+.p2align	4
+_gcm_gmult_ssse3:
+
+L$gmult_seh_begin:
+	movdqu	(%rdi),%xmm0
+	movdqa	L$reverse_bytes(%rip),%xmm10
+	movdqa	L$low4_mask(%rip),%xmm2
+
+
+.byte	102,65,15,56,0,194
+
+
+	movdqa	%xmm2,%xmm1
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm2,%xmm0
+
+
+
+
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	movq	$5,%rax
+L$oop_row_1:
+	movdqa	(%rsi),%xmm4
+	leaq	16(%rsi),%rsi
+
+
+	movdqa	%xmm2,%xmm6
+.byte	102,15,58,15,243,1
+	movdqa	%xmm6,%xmm3
+	psrldq	$1,%xmm2
+
+
+
+
+	movdqa	%xmm4,%xmm5
+.byte	102,15,56,0,224
+.byte	102,15,56,0,233
+
+
+	pxor	%xmm5,%xmm2
+
+
+
+	movdqa	%xmm4,%xmm5
+	psllq	$60,%xmm5
+	movdqa	%xmm5,%xmm6
+	pslldq	$8,%xmm6
+	pxor	%xmm6,%xmm3
+
+
+	psrldq	$8,%xmm5
+	pxor	%xmm5,%xmm2
+	psrlq	$4,%xmm4
+	pxor	%xmm4,%xmm2
+
+	subq	$1,%rax
+	jnz	L$oop_row_1
+
+
+
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$5,%xmm3
+	pxor	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movq	$5,%rax
+L$oop_row_2:
+	movdqa	(%rsi),%xmm4
+	leaq	16(%rsi),%rsi
+
+
+	movdqa	%xmm2,%xmm6
+.byte	102,15,58,15,243,1
+	movdqa	%xmm6,%xmm3
+	psrldq	$1,%xmm2
+
+
+
+
+	movdqa	%xmm4,%xmm5
+.byte	102,15,56,0,224
+.byte	102,15,56,0,233
+
+
+	pxor	%xmm5,%xmm2
+
+
+
+	movdqa	%xmm4,%xmm5
+	psllq	$60,%xmm5
+	movdqa	%xmm5,%xmm6
+	pslldq	$8,%xmm6
+	pxor	%xmm6,%xmm3
+
+
+	psrldq	$8,%xmm5
+	pxor	%xmm5,%xmm2
+	psrlq	$4,%xmm4
+	pxor	%xmm4,%xmm2
+
+	subq	$1,%rax
+	jnz	L$oop_row_2
+
+
+
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$5,%xmm3
+	pxor	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movq	$6,%rax
+L$oop_row_3:
+	movdqa	(%rsi),%xmm4
+	leaq	16(%rsi),%rsi
+
+
+	movdqa	%xmm2,%xmm6
+.byte	102,15,58,15,243,1
+	movdqa	%xmm6,%xmm3
+	psrldq	$1,%xmm2
+
+
+
+
+	movdqa	%xmm4,%xmm5
+.byte	102,15,56,0,224
+.byte	102,15,56,0,233
+
+
+	pxor	%xmm5,%xmm2
+
+
+
+	movdqa	%xmm4,%xmm5
+	psllq	$60,%xmm5
+	movdqa	%xmm5,%xmm6
+	pslldq	$8,%xmm6
+	pxor	%xmm6,%xmm3
+
+
+	psrldq	$8,%xmm5
+	pxor	%xmm5,%xmm2
+	psrlq	$4,%xmm4
+	pxor	%xmm4,%xmm2
+
+	subq	$1,%rax
+	jnz	L$oop_row_3
+
+
+
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$5,%xmm3
+	pxor	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+
+.byte	102,65,15,56,0,210
+	movdqu	%xmm2,(%rdi)
+
+
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	.byte	0xf3,0xc3
+L$gmult_seh_end:
+
+
+
+
+
+
+
+
+.globl	_gcm_ghash_ssse3
+.private_extern _gcm_ghash_ssse3
+.p2align	4
+_gcm_ghash_ssse3:
+L$ghash_seh_begin:
+
+	movdqu	(%rdi),%xmm0
+	movdqa	L$reverse_bytes(%rip),%xmm10
+	movdqa	L$low4_mask(%rip),%xmm11
+
+
+	andq	$-16,%rcx
+
+
+
+.byte	102,65,15,56,0,194
+
+
+	pxor	%xmm3,%xmm3
+L$oop_ghash:
+
+	movdqu	(%rdx),%xmm1
+.byte	102,65,15,56,0,202
+	pxor	%xmm1,%xmm0
+
+
+	movdqa	%xmm11,%xmm1
+	pandn	%xmm0,%xmm1
+	psrld	$4,%xmm1
+	pand	%xmm11,%xmm0
+
+
+
+
+	pxor	%xmm2,%xmm2
+
+	movq	$5,%rax
+L$oop_row_4:
+	movdqa	(%rsi),%xmm4
+	leaq	16(%rsi),%rsi
+
+
+	movdqa	%xmm2,%xmm6
+.byte	102,15,58,15,243,1
+	movdqa	%xmm6,%xmm3
+	psrldq	$1,%xmm2
+
+
+
+
+	movdqa	%xmm4,%xmm5
+.byte	102,15,56,0,224
+.byte	102,15,56,0,233
+
+
+	pxor	%xmm5,%xmm2
+
+
+
+	movdqa	%xmm4,%xmm5
+	psllq	$60,%xmm5
+	movdqa	%xmm5,%xmm6
+	pslldq	$8,%xmm6
+	pxor	%xmm6,%xmm3
+
+
+	psrldq	$8,%xmm5
+	pxor	%xmm5,%xmm2
+	psrlq	$4,%xmm4
+	pxor	%xmm4,%xmm2
+
+	subq	$1,%rax
+	jnz	L$oop_row_4
+
+
+
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$5,%xmm3
+	pxor	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movq	$5,%rax
+L$oop_row_5:
+	movdqa	(%rsi),%xmm4
+	leaq	16(%rsi),%rsi
+
+
+	movdqa	%xmm2,%xmm6
+.byte	102,15,58,15,243,1
+	movdqa	%xmm6,%xmm3
+	psrldq	$1,%xmm2
+
+
+
+
+	movdqa	%xmm4,%xmm5
+.byte	102,15,56,0,224
+.byte	102,15,56,0,233
+
+
+	pxor	%xmm5,%xmm2
+
+
+
+	movdqa	%xmm4,%xmm5
+	psllq	$60,%xmm5
+	movdqa	%xmm5,%xmm6
+	pslldq	$8,%xmm6
+	pxor	%xmm6,%xmm3
+
+
+	psrldq	$8,%xmm5
+	pxor	%xmm5,%xmm2
+	psrlq	$4,%xmm4
+	pxor	%xmm4,%xmm2
+
+	subq	$1,%rax
+	jnz	L$oop_row_5
+
+
+
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$5,%xmm3
+	pxor	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movq	$6,%rax
+L$oop_row_6:
+	movdqa	(%rsi),%xmm4
+	leaq	16(%rsi),%rsi
+
+
+	movdqa	%xmm2,%xmm6
+.byte	102,15,58,15,243,1
+	movdqa	%xmm6,%xmm3
+	psrldq	$1,%xmm2
+
+
+
+
+	movdqa	%xmm4,%xmm5
+.byte	102,15,56,0,224
+.byte	102,15,56,0,233
+
+
+	pxor	%xmm5,%xmm2
+
+
+
+	movdqa	%xmm4,%xmm5
+	psllq	$60,%xmm5
+	movdqa	%xmm5,%xmm6
+	pslldq	$8,%xmm6
+	pxor	%xmm6,%xmm3
+
+
+	psrldq	$8,%xmm5
+	pxor	%xmm5,%xmm2
+	psrlq	$4,%xmm4
+	pxor	%xmm4,%xmm2
+
+	subq	$1,%rax
+	jnz	L$oop_row_6
+
+
+
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$1,%xmm3
+	pxor	%xmm3,%xmm2
+	psrlq	$5,%xmm3
+	pxor	%xmm3,%xmm2
+	pxor	%xmm3,%xmm3
+	movdqa	%xmm2,%xmm0
+
+
+	leaq	-256(%rsi),%rsi
+
+
+	leaq	16(%rdx),%rdx
+	subq	$16,%rcx
+	jnz	L$oop_ghash
+
+
+.byte	102,65,15,56,0,194
+	movdqu	%xmm0,(%rdi)
+
+
+	pxor	%xmm0,%xmm0
+	pxor	%xmm1,%xmm1
+	pxor	%xmm2,%xmm2
+	pxor	%xmm3,%xmm3
+	pxor	%xmm4,%xmm4
+	pxor	%xmm5,%xmm5
+	pxor	%xmm6,%xmm6
+	.byte	0xf3,0xc3
+L$ghash_seh_end:
+
+
+
+.p2align	4
+
+
+L$reverse_bytes:
+.byte	15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
+
+L$low4_mask:
+.quad	0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+#endif
+#endif  // defined(__x86_64__) && defined(__APPLE__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-x86.linux.x86.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-x86.linux.x86.S
@ -0,0 +1,337 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__i386__) && defined(__linux__)
+# This file is generated from a similarly-named Perl script in the BoringSSL
+# source tree. Do not edit by hand.
+
+#if defined(__i386__)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+.text
+.globl	gcm_init_clmul
+.hidden	gcm_init_clmul
+.type	gcm_init_clmul,@function
+.align	16
+gcm_init_clmul:
+.L_gcm_init_clmul_begin:
+	movl	4(%esp),%edx
+	movl	8(%esp),%eax
+	call	.L000pic
+.L000pic:
+	popl	%ecx
+	leal	.Lbswap-.L000pic(%ecx),%ecx
+	movdqu	(%eax),%xmm2
+	pshufd	$78,%xmm2,%xmm2
+	pshufd	$255,%xmm2,%xmm4
+	movdqa	%xmm2,%xmm3
+	psllq	$1,%xmm2
+	pxor	%xmm5,%xmm5
+	psrlq	$63,%xmm3
+	pcmpgtd	%xmm4,%xmm5
+	pslldq	$8,%xmm3
+	por	%xmm3,%xmm2
+	pand	16(%ecx),%xmm5
+	pxor	%xmm5,%xmm2
+	movdqa	%xmm2,%xmm0
+	movdqa	%xmm0,%xmm1
+	pshufd	$78,%xmm0,%xmm3
+	pshufd	$78,%xmm2,%xmm4
+	pxor	%xmm0,%xmm3
+	pxor	%xmm2,%xmm4
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,220,0
+	xorps	%xmm0,%xmm3
+	xorps	%xmm1,%xmm3
+	movdqa	%xmm3,%xmm4
+	psrldq	$8,%xmm3
+	pslldq	$8,%xmm4
+	pxor	%xmm3,%xmm1
+	pxor	%xmm4,%xmm0
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+	pshufd	$78,%xmm2,%xmm3
+	pshufd	$78,%xmm0,%xmm4
+	pxor	%xmm2,%xmm3
+	movdqu	%xmm2,(%edx)
+	pxor	%xmm0,%xmm4
+	movdqu	%xmm0,16(%edx)
+.byte	102,15,58,15,227,8
+	movdqu	%xmm4,32(%edx)
+	ret
+.size	gcm_init_clmul,.-.L_gcm_init_clmul_begin
+.globl	gcm_gmult_clmul
+.hidden	gcm_gmult_clmul
+.type	gcm_gmult_clmul,@function
+.align	16
+gcm_gmult_clmul:
+.L_gcm_gmult_clmul_begin:
+	movl	4(%esp),%eax
+	movl	8(%esp),%edx
+	call	.L001pic
+.L001pic:
+	popl	%ecx
+	leal	.Lbswap-.L001pic(%ecx),%ecx
+	movdqu	(%eax),%xmm0
+	movdqa	(%ecx),%xmm5
+	movups	(%edx),%xmm2
+.byte	102,15,56,0,197
+	movups	32(%edx),%xmm4
+	movdqa	%xmm0,%xmm1
+	pshufd	$78,%xmm0,%xmm3
+	pxor	%xmm0,%xmm3
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,220,0
+	xorps	%xmm0,%xmm3
+	xorps	%xmm1,%xmm3
+	movdqa	%xmm3,%xmm4
+	psrldq	$8,%xmm3
+	pslldq	$8,%xmm4
+	pxor	%xmm3,%xmm1
+	pxor	%xmm4,%xmm0
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+.byte	102,15,56,0,197
+	movdqu	%xmm0,(%eax)
+	ret
+.size	gcm_gmult_clmul,.-.L_gcm_gmult_clmul_begin
+.globl	gcm_ghash_clmul
+.hidden	gcm_ghash_clmul
+.type	gcm_ghash_clmul,@function
+.align	16
+gcm_ghash_clmul:
+.L_gcm_ghash_clmul_begin:
+	pushl	%ebp
+	pushl	%ebx
+	pushl	%esi
+	pushl	%edi
+	movl	20(%esp),%eax
+	movl	24(%esp),%edx
+	movl	28(%esp),%esi
+	movl	32(%esp),%ebx
+	call	.L002pic
+.L002pic:
+	popl	%ecx
+	leal	.Lbswap-.L002pic(%ecx),%ecx
+	movdqu	(%eax),%xmm0
+	movdqa	(%ecx),%xmm5
+	movdqu	(%edx),%xmm2
+.byte	102,15,56,0,197
+	subl	$16,%ebx
+	jz	.L003odd_tail
+	movdqu	(%esi),%xmm3
+	movdqu	16(%esi),%xmm6
+.byte	102,15,56,0,221
+.byte	102,15,56,0,245
+	movdqu	32(%edx),%xmm5
+	pxor	%xmm3,%xmm0
+	pshufd	$78,%xmm6,%xmm3
+	movdqa	%xmm6,%xmm7
+	pxor	%xmm6,%xmm3
+	leal	32(%esi),%esi
+.byte	102,15,58,68,242,0
+.byte	102,15,58,68,250,17
+.byte	102,15,58,68,221,0
+	movups	16(%edx),%xmm2
+	nop
+	subl	$32,%ebx
+	jbe	.L004even_tail
+	jmp	.L005mod_loop
+.align	32
+.L005mod_loop:
+	pshufd	$78,%xmm0,%xmm4
+	movdqa	%xmm0,%xmm1
+	pxor	%xmm0,%xmm4
+	nop
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,229,16
+	movups	(%edx),%xmm2
+	xorps	%xmm6,%xmm0
+	movdqa	(%ecx),%xmm5
+	xorps	%xmm7,%xmm1
+	movdqu	(%esi),%xmm7
+	pxor	%xmm0,%xmm3
+	movdqu	16(%esi),%xmm6
+	pxor	%xmm1,%xmm3
+.byte	102,15,56,0,253
+	pxor	%xmm3,%xmm4
+	movdqa	%xmm4,%xmm3
+	psrldq	$8,%xmm4
+	pslldq	$8,%xmm3
+	pxor	%xmm4,%xmm1
+	pxor	%xmm3,%xmm0
+.byte	102,15,56,0,245
+	pxor	%xmm7,%xmm1
+	movdqa	%xmm6,%xmm7
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+.byte	102,15,58,68,242,0
+	movups	32(%edx),%xmm5
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+	pshufd	$78,%xmm7,%xmm3
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm7,%xmm3
+	pxor	%xmm4,%xmm1
+.byte	102,15,58,68,250,17
+	movups	16(%edx),%xmm2
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+.byte	102,15,58,68,221,0
+	leal	32(%esi),%esi
+	subl	$32,%ebx
+	ja	.L005mod_loop
+.L004even_tail:
+	pshufd	$78,%xmm0,%xmm4
+	movdqa	%xmm0,%xmm1
+	pxor	%xmm0,%xmm4
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,229,16
+	movdqa	(%ecx),%xmm5
+	xorps	%xmm6,%xmm0
+	xorps	%xmm7,%xmm1
+	pxor	%xmm0,%xmm3
+	pxor	%xmm1,%xmm3
+	pxor	%xmm3,%xmm4
+	movdqa	%xmm4,%xmm3
+	psrldq	$8,%xmm4
+	pslldq	$8,%xmm3
+	pxor	%xmm4,%xmm1
+	pxor	%xmm3,%xmm0
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+	testl	%ebx,%ebx
+	jnz	.L006done
+	movups	(%edx),%xmm2
+.L003odd_tail:
+	movdqu	(%esi),%xmm3
+.byte	102,15,56,0,221
+	pxor	%xmm3,%xmm0
+	movdqa	%xmm0,%xmm1
+	pshufd	$78,%xmm0,%xmm3
+	pshufd	$78,%xmm2,%xmm4
+	pxor	%xmm0,%xmm3
+	pxor	%xmm2,%xmm4
+.byte	102,15,58,68,194,0
+.byte	102,15,58,68,202,17
+.byte	102,15,58,68,220,0
+	xorps	%xmm0,%xmm3
+	xorps	%xmm1,%xmm3
+	movdqa	%xmm3,%xmm4
+	psrldq	$8,%xmm3
+	pslldq	$8,%xmm4
+	pxor	%xmm3,%xmm1
+	pxor	%xmm4,%xmm0
+	movdqa	%xmm0,%xmm4
+	movdqa	%xmm0,%xmm3
+	psllq	$5,%xmm0
+	pxor	%xmm0,%xmm3
+	psllq	$1,%xmm0
+	pxor	%xmm3,%xmm0
+	psllq	$57,%xmm0
+	movdqa	%xmm0,%xmm3
+	pslldq	$8,%xmm0
+	psrldq	$8,%xmm3
+	pxor	%xmm4,%xmm0
+	pxor	%xmm3,%xmm1
+	movdqa	%xmm0,%xmm4
+	psrlq	$1,%xmm0
+	pxor	%xmm4,%xmm1
+	pxor	%xmm0,%xmm4
+	psrlq	$5,%xmm0
+	pxor	%xmm4,%xmm0
+	psrlq	$1,%xmm0
+	pxor	%xmm1,%xmm0
+.L006done:
+.byte	102,15,56,0,197
+	movdqu	%xmm0,(%eax)
+	popl	%edi
+	popl	%esi
+	popl	%ebx
+	popl	%ebp
+	ret
+.size	gcm_ghash_clmul,.-.L_gcm_ghash_clmul_begin
+.align	64
+.Lbswap:
+.byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.byte	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
+.byte	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
+.byte	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
+.byte	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
+.byte	0
+#endif
+.section	.note.GNU-stack,"",@progbits
+#endif  // defined(__i386__) && defined(__linux__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-x86_64.linux.x86_64.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-x86_64.linux.x86_64.S
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-x86_64.mac.x86_64.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghash-x86_64.mac.x86_64.S
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghashv8-armx32.ios.arm.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghashv8-armx32.ios.arm.S
@ -0,0 +1,263 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__arm__) && defined(__APPLE__)
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+#include <CBigNumBoringSSL_arm_arch.h>
+
+.text
+
+.code	32
+#undef	__thumb2__
+.globl	_gcm_init_v8
+.private_extern	_gcm_init_v8
+#ifdef __thumb2__
+.thumb_func	_gcm_init_v8
+#endif
+.align	4
+_gcm_init_v8:
+	vld1.64	{q9},[r1]		@ load input H
+	vmov.i8	q11,#0xe1
+	vshl.i64	q11,q11,#57		@ 0xc2.0
+	vext.8	q3,q9,q9,#8
+	vshr.u64	q10,q11,#63
+	vdup.32	q9,d18[1]
+	vext.8	q8,q10,q11,#8		@ t0=0xc2....01
+	vshr.u64	q10,q3,#63
+	vshr.s32	q9,q9,#31		@ broadcast carry bit
+	vand	q10,q10,q8
+	vshl.i64	q3,q3,#1
+	vext.8	q10,q10,q10,#8
+	vand	q8,q8,q9
+	vorr	q3,q3,q10		@ H<<<=1
+	veor	q12,q3,q8		@ twisted H
+	vst1.64	{q12},[r0]!		@ store Htable[0]
+
+	@ calculate H^2
+	vext.8	q8,q12,q12,#8		@ Karatsuba pre-processing
+.byte	0xa8,0x0e,0xa8,0xf2	@ pmull q0,q12,q12
+	veor	q8,q8,q12
+.byte	0xa9,0x4e,0xa9,0xf2	@ pmull2 q2,q12,q12
+.byte	0xa0,0x2e,0xa0,0xf2	@ pmull q1,q8,q8
+
+	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
+	veor	q10,q0,q2
+	veor	q1,q1,q9
+	veor	q1,q1,q10
+.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase
+
+	vmov	d4,d3		@ Xh|Xm - 256-bit result
+	vmov	d3,d0		@ Xm is rotated Xl
+	veor	q0,q1,q10
+
+	vext.8	q10,q0,q0,#8		@ 2nd phase
+.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
+	veor	q10,q10,q2
+	veor	q14,q0,q10
+
+	vext.8	q9,q14,q14,#8		@ Karatsuba pre-processing
+	veor	q9,q9,q14
+	vext.8	q13,q8,q9,#8		@ pack Karatsuba pre-processed
+	vst1.64	{q13,q14},[r0]		@ store Htable[1..2]
+
+	bx	lr
+
+.globl	_gcm_gmult_v8
+.private_extern	_gcm_gmult_v8
+#ifdef __thumb2__
+.thumb_func	_gcm_gmult_v8
+#endif
+.align	4
+_gcm_gmult_v8:
+	vld1.64	{q9},[r0]		@ load Xi
+	vmov.i8	q11,#0xe1
+	vld1.64	{q12,q13},[r1]	@ load twisted H, ...
+	vshl.u64	q11,q11,#57
+#ifndef __ARMEB__
+	vrev64.8	q9,q9
+#endif
+	vext.8	q3,q9,q9,#8
+
+.byte	0x86,0x0e,0xa8,0xf2	@ pmull q0,q12,q3		@ H.lo·Xi.lo
+	veor	q9,q9,q3		@ Karatsuba pre-processing
+.byte	0x87,0x4e,0xa9,0xf2	@ pmull2 q2,q12,q3		@ H.hi·Xi.hi
+.byte	0xa2,0x2e,0xaa,0xf2	@ pmull q1,q13,q9		@ (H.lo+H.hi)·(Xi.lo+Xi.hi)
+
+	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
+	veor	q10,q0,q2
+	veor	q1,q1,q9
+	veor	q1,q1,q10
+.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase of reduction
+
+	vmov	d4,d3		@ Xh|Xm - 256-bit result
+	vmov	d3,d0		@ Xm is rotated Xl
+	veor	q0,q1,q10
+
+	vext.8	q10,q0,q0,#8		@ 2nd phase of reduction
+.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
+	veor	q10,q10,q2
+	veor	q0,q0,q10
+
+#ifndef __ARMEB__
+	vrev64.8	q0,q0
+#endif
+	vext.8	q0,q0,q0,#8
+	vst1.64	{q0},[r0]		@ write out Xi
+
+	bx	lr
+
+.globl	_gcm_ghash_v8
+.private_extern	_gcm_ghash_v8
+#ifdef __thumb2__
+.thumb_func	_gcm_ghash_v8
+#endif
+.align	4
+_gcm_ghash_v8:
+	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ 32-bit ABI says so
+	vld1.64	{q0},[r0]		@ load [rotated] Xi
+						@ "[rotated]" means that
+						@ loaded value would have
+						@ to be rotated in order to
+						@ make it appear as in
+						@ algorithm specification
+	subs	r3,r3,#32		@ see if r3 is 32 or larger
+	mov	r12,#16		@ r12 is used as post-
+						@ increment for input pointer;
+						@ as loop is modulo-scheduled
+						@ r12 is zeroed just in time
+						@ to preclude overstepping
+						@ inp[len], which means that
+						@ last block[s] are actually
+						@ loaded twice, but last
+						@ copy is not processed
+	vld1.64	{q12,q13},[r1]!	@ load twisted H, ..., H^2
+	vmov.i8	q11,#0xe1
+	vld1.64	{q14},[r1]
+	moveq	r12,#0			@ is it time to zero r12?
+	vext.8	q0,q0,q0,#8		@ rotate Xi
+	vld1.64	{q8},[r2]!	@ load [rotated] I[0]
+	vshl.u64	q11,q11,#57		@ compose 0xc2.0 constant
+#ifndef __ARMEB__
+	vrev64.8	q8,q8
+	vrev64.8	q0,q0
+#endif
+	vext.8	q3,q8,q8,#8		@ rotate I[0]
+	blo	Lodd_tail_v8		@ r3 was less than 32
+	vld1.64	{q9},[r2],r12	@ load [rotated] I[1]
+#ifndef __ARMEB__
+	vrev64.8	q9,q9
+#endif
+	vext.8	q7,q9,q9,#8
+	veor	q3,q3,q0		@ I[i]^=Xi
+.byte	0x8e,0x8e,0xa8,0xf2	@ pmull q4,q12,q7		@ H·Ii+1
+	veor	q9,q9,q7		@ Karatsuba pre-processing
+.byte	0x8f,0xce,0xa9,0xf2	@ pmull2 q6,q12,q7
+	b	Loop_mod2x_v8
+
+.align	4
+Loop_mod2x_v8:
+	vext.8	q10,q3,q3,#8
+	subs	r3,r3,#32		@ is there more data?
+.byte	0x86,0x0e,0xac,0xf2	@ pmull q0,q14,q3		@ H^2.lo·Xi.lo
+	movlo	r12,#0			@ is it time to zero r12?
+
+.byte	0xa2,0xae,0xaa,0xf2	@ pmull q5,q13,q9
+	veor	q10,q10,q3		@ Karatsuba pre-processing
+.byte	0x87,0x4e,0xad,0xf2	@ pmull2 q2,q14,q3		@ H^2.hi·Xi.hi
+	veor	q0,q0,q4		@ accumulate
+.byte	0xa5,0x2e,0xab,0xf2	@ pmull2 q1,q13,q10		@ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
+	vld1.64	{q8},[r2],r12	@ load [rotated] I[i+2]
+
+	veor	q2,q2,q6
+	moveq	r12,#0			@ is it time to zero r12?
+	veor	q1,q1,q5
+
+	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
+	veor	q10,q0,q2
+	veor	q1,q1,q9
+	vld1.64	{q9},[r2],r12	@ load [rotated] I[i+3]
+#ifndef __ARMEB__
+	vrev64.8	q8,q8
+#endif
+	veor	q1,q1,q10
+.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase of reduction
+
+#ifndef __ARMEB__
+	vrev64.8	q9,q9
+#endif
+	vmov	d4,d3		@ Xh|Xm - 256-bit result
+	vmov	d3,d0		@ Xm is rotated Xl
+	vext.8	q7,q9,q9,#8
+	vext.8	q3,q8,q8,#8
+	veor	q0,q1,q10
+.byte	0x8e,0x8e,0xa8,0xf2	@ pmull q4,q12,q7		@ H·Ii+1
+	veor	q3,q3,q2		@ accumulate q3 early
+
+	vext.8	q10,q0,q0,#8		@ 2nd phase of reduction
+.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
+	veor	q3,q3,q10
+	veor	q9,q9,q7		@ Karatsuba pre-processing
+	veor	q3,q3,q0
+.byte	0x8f,0xce,0xa9,0xf2	@ pmull2 q6,q12,q7
+	bhs	Loop_mod2x_v8		@ there was at least 32 more bytes
+
+	veor	q2,q2,q10
+	vext.8	q3,q8,q8,#8		@ re-construct q3
+	adds	r3,r3,#32		@ re-construct r3
+	veor	q0,q0,q2		@ re-construct q0
+	beq	Ldone_v8		@ is r3 zero?
+Lodd_tail_v8:
+	vext.8	q10,q0,q0,#8
+	veor	q3,q3,q0		@ inp^=Xi
+	veor	q9,q8,q10		@ q9 is rotated inp^Xi
+
+.byte	0x86,0x0e,0xa8,0xf2	@ pmull q0,q12,q3		@ H.lo·Xi.lo
+	veor	q9,q9,q3		@ Karatsuba pre-processing
+.byte	0x87,0x4e,0xa9,0xf2	@ pmull2 q2,q12,q3		@ H.hi·Xi.hi
+.byte	0xa2,0x2e,0xaa,0xf2	@ pmull q1,q13,q9		@ (H.lo+H.hi)·(Xi.lo+Xi.hi)
+
+	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
+	veor	q10,q0,q2
+	veor	q1,q1,q9
+	veor	q1,q1,q10
+.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase of reduction
+
+	vmov	d4,d3		@ Xh|Xm - 256-bit result
+	vmov	d3,d0		@ Xm is rotated Xl
+	veor	q0,q1,q10
+
+	vext.8	q10,q0,q0,#8		@ 2nd phase of reduction
+.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
+	veor	q10,q10,q2
+	veor	q0,q0,q10
+
+Ldone_v8:
+#ifndef __ARMEB__
+	vrev64.8	q0,q0
+#endif
+	vext.8	q0,q0,q0,#8
+	vst1.64	{q0},[r0]		@ write out Xi
+
+	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ 32-bit ABI says so
+	bx	lr
+
+.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	2
+#endif  // !OPENSSL_NO_ASM
+#endif  // defined(__arm__) && defined(__APPLE__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghashv8-armx32.linux.arm.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghashv8-armx32.linux.arm.S
@ -0,0 +1,260 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__arm__) && defined(__linux__)
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(__arm__)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+#include <CBigNumBoringSSL_arm_arch.h>
+
+.text
+.fpu	neon
+.code	32
+#undef	__thumb2__
+.globl	gcm_init_v8
+.hidden	gcm_init_v8
+.type	gcm_init_v8,%function
+.align	4
+gcm_init_v8:
+	vld1.64	{q9},[r1]		@ load input H
+	vmov.i8	q11,#0xe1
+	vshl.i64	q11,q11,#57		@ 0xc2.0
+	vext.8	q3,q9,q9,#8
+	vshr.u64	q10,q11,#63
+	vdup.32	q9,d18[1]
+	vext.8	q8,q10,q11,#8		@ t0=0xc2....01
+	vshr.u64	q10,q3,#63
+	vshr.s32	q9,q9,#31		@ broadcast carry bit
+	vand	q10,q10,q8
+	vshl.i64	q3,q3,#1
+	vext.8	q10,q10,q10,#8
+	vand	q8,q8,q9
+	vorr	q3,q3,q10		@ H<<<=1
+	veor	q12,q3,q8		@ twisted H
+	vst1.64	{q12},[r0]!		@ store Htable[0]
+
+	@ calculate H^2
+	vext.8	q8,q12,q12,#8		@ Karatsuba pre-processing
+.byte	0xa8,0x0e,0xa8,0xf2	@ pmull q0,q12,q12
+	veor	q8,q8,q12
+.byte	0xa9,0x4e,0xa9,0xf2	@ pmull2 q2,q12,q12
+.byte	0xa0,0x2e,0xa0,0xf2	@ pmull q1,q8,q8
+
+	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
+	veor	q10,q0,q2
+	veor	q1,q1,q9
+	veor	q1,q1,q10
+.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase
+
+	vmov	d4,d3		@ Xh|Xm - 256-bit result
+	vmov	d3,d0		@ Xm is rotated Xl
+	veor	q0,q1,q10
+
+	vext.8	q10,q0,q0,#8		@ 2nd phase
+.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
+	veor	q10,q10,q2
+	veor	q14,q0,q10
+
+	vext.8	q9,q14,q14,#8		@ Karatsuba pre-processing
+	veor	q9,q9,q14
+	vext.8	q13,q8,q9,#8		@ pack Karatsuba pre-processed
+	vst1.64	{q13,q14},[r0]		@ store Htable[1..2]
+
+	bx	lr
+.size	gcm_init_v8,.-gcm_init_v8
+.globl	gcm_gmult_v8
+.hidden	gcm_gmult_v8
+.type	gcm_gmult_v8,%function
+.align	4
+gcm_gmult_v8:
+	vld1.64	{q9},[r0]		@ load Xi
+	vmov.i8	q11,#0xe1
+	vld1.64	{q12,q13},[r1]	@ load twisted H, ...
+	vshl.u64	q11,q11,#57
+#ifndef __ARMEB__
+	vrev64.8	q9,q9
+#endif
+	vext.8	q3,q9,q9,#8
+
+.byte	0x86,0x0e,0xa8,0xf2	@ pmull q0,q12,q3		@ H.lo·Xi.lo
+	veor	q9,q9,q3		@ Karatsuba pre-processing
+.byte	0x87,0x4e,0xa9,0xf2	@ pmull2 q2,q12,q3		@ H.hi·Xi.hi
+.byte	0xa2,0x2e,0xaa,0xf2	@ pmull q1,q13,q9		@ (H.lo+H.hi)·(Xi.lo+Xi.hi)
+
+	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
+	veor	q10,q0,q2
+	veor	q1,q1,q9
+	veor	q1,q1,q10
+.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase of reduction
+
+	vmov	d4,d3		@ Xh|Xm - 256-bit result
+	vmov	d3,d0		@ Xm is rotated Xl
+	veor	q0,q1,q10
+
+	vext.8	q10,q0,q0,#8		@ 2nd phase of reduction
+.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
+	veor	q10,q10,q2
+	veor	q0,q0,q10
+
+#ifndef __ARMEB__
+	vrev64.8	q0,q0
+#endif
+	vext.8	q0,q0,q0,#8
+	vst1.64	{q0},[r0]		@ write out Xi
+
+	bx	lr
+.size	gcm_gmult_v8,.-gcm_gmult_v8
+.globl	gcm_ghash_v8
+.hidden	gcm_ghash_v8
+.type	gcm_ghash_v8,%function
+.align	4
+gcm_ghash_v8:
+	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ 32-bit ABI says so
+	vld1.64	{q0},[r0]		@ load [rotated] Xi
+						@ "[rotated]" means that
+						@ loaded value would have
+						@ to be rotated in order to
+						@ make it appear as in
+						@ algorithm specification
+	subs	r3,r3,#32		@ see if r3 is 32 or larger
+	mov	r12,#16		@ r12 is used as post-
+						@ increment for input pointer;
+						@ as loop is modulo-scheduled
+						@ r12 is zeroed just in time
+						@ to preclude overstepping
+						@ inp[len], which means that
+						@ last block[s] are actually
+						@ loaded twice, but last
+						@ copy is not processed
+	vld1.64	{q12,q13},[r1]!	@ load twisted H, ..., H^2
+	vmov.i8	q11,#0xe1
+	vld1.64	{q14},[r1]
+	moveq	r12,#0			@ is it time to zero r12?
+	vext.8	q0,q0,q0,#8		@ rotate Xi
+	vld1.64	{q8},[r2]!	@ load [rotated] I[0]
+	vshl.u64	q11,q11,#57		@ compose 0xc2.0 constant
+#ifndef __ARMEB__
+	vrev64.8	q8,q8
+	vrev64.8	q0,q0
+#endif
+	vext.8	q3,q8,q8,#8		@ rotate I[0]
+	blo	.Lodd_tail_v8		@ r3 was less than 32
+	vld1.64	{q9},[r2],r12	@ load [rotated] I[1]
+#ifndef __ARMEB__
+	vrev64.8	q9,q9
+#endif
+	vext.8	q7,q9,q9,#8
+	veor	q3,q3,q0		@ I[i]^=Xi
+.byte	0x8e,0x8e,0xa8,0xf2	@ pmull q4,q12,q7		@ H·Ii+1
+	veor	q9,q9,q7		@ Karatsuba pre-processing
+.byte	0x8f,0xce,0xa9,0xf2	@ pmull2 q6,q12,q7
+	b	.Loop_mod2x_v8
+
+.align	4
+.Loop_mod2x_v8:
+	vext.8	q10,q3,q3,#8
+	subs	r3,r3,#32		@ is there more data?
+.byte	0x86,0x0e,0xac,0xf2	@ pmull q0,q14,q3		@ H^2.lo·Xi.lo
+	movlo	r12,#0			@ is it time to zero r12?
+
+.byte	0xa2,0xae,0xaa,0xf2	@ pmull q5,q13,q9
+	veor	q10,q10,q3		@ Karatsuba pre-processing
+.byte	0x87,0x4e,0xad,0xf2	@ pmull2 q2,q14,q3		@ H^2.hi·Xi.hi
+	veor	q0,q0,q4		@ accumulate
+.byte	0xa5,0x2e,0xab,0xf2	@ pmull2 q1,q13,q10		@ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
+	vld1.64	{q8},[r2],r12	@ load [rotated] I[i+2]
+
+	veor	q2,q2,q6
+	moveq	r12,#0			@ is it time to zero r12?
+	veor	q1,q1,q5
+
+	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
+	veor	q10,q0,q2
+	veor	q1,q1,q9
+	vld1.64	{q9},[r2],r12	@ load [rotated] I[i+3]
+#ifndef __ARMEB__
+	vrev64.8	q8,q8
+#endif
+	veor	q1,q1,q10
+.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase of reduction
+
+#ifndef __ARMEB__
+	vrev64.8	q9,q9
+#endif
+	vmov	d4,d3		@ Xh|Xm - 256-bit result
+	vmov	d3,d0		@ Xm is rotated Xl
+	vext.8	q7,q9,q9,#8
+	vext.8	q3,q8,q8,#8
+	veor	q0,q1,q10
+.byte	0x8e,0x8e,0xa8,0xf2	@ pmull q4,q12,q7		@ H·Ii+1
+	veor	q3,q3,q2		@ accumulate q3 early
+
+	vext.8	q10,q0,q0,#8		@ 2nd phase of reduction
+.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
+	veor	q3,q3,q10
+	veor	q9,q9,q7		@ Karatsuba pre-processing
+	veor	q3,q3,q0
+.byte	0x8f,0xce,0xa9,0xf2	@ pmull2 q6,q12,q7
+	bhs	.Loop_mod2x_v8		@ there was at least 32 more bytes
+
+	veor	q2,q2,q10
+	vext.8	q3,q8,q8,#8		@ re-construct q3
+	adds	r3,r3,#32		@ re-construct r3
+	veor	q0,q0,q2		@ re-construct q0
+	beq	.Ldone_v8		@ is r3 zero?
+.Lodd_tail_v8:
+	vext.8	q10,q0,q0,#8
+	veor	q3,q3,q0		@ inp^=Xi
+	veor	q9,q8,q10		@ q9 is rotated inp^Xi
+
+.byte	0x86,0x0e,0xa8,0xf2	@ pmull q0,q12,q3		@ H.lo·Xi.lo
+	veor	q9,q9,q3		@ Karatsuba pre-processing
+.byte	0x87,0x4e,0xa9,0xf2	@ pmull2 q2,q12,q3		@ H.hi·Xi.hi
+.byte	0xa2,0x2e,0xaa,0xf2	@ pmull q1,q13,q9		@ (H.lo+H.hi)·(Xi.lo+Xi.hi)
+
+	vext.8	q9,q0,q2,#8		@ Karatsuba post-processing
+	veor	q10,q0,q2
+	veor	q1,q1,q9
+	veor	q1,q1,q10
+.byte	0x26,0x4e,0xe0,0xf2	@ pmull q10,q0,q11		@ 1st phase of reduction
+
+	vmov	d4,d3		@ Xh|Xm - 256-bit result
+	vmov	d3,d0		@ Xm is rotated Xl
+	veor	q0,q1,q10
+
+	vext.8	q10,q0,q0,#8		@ 2nd phase of reduction
+.byte	0x26,0x0e,0xa0,0xf2	@ pmull q0,q0,q11
+	veor	q10,q10,q2
+	veor	q0,q0,q10
+
+.Ldone_v8:
+#ifndef __ARMEB__
+	vrev64.8	q0,q0
+#endif
+	vext.8	q0,q0,q0,#8
+	vst1.64	{q0},[r0]		@ write out Xi
+
+	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ 32-bit ABI says so
+	bx	lr
+.size	gcm_ghash_v8,.-gcm_ghash_v8
+.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	2
+#endif
+#endif  // !OPENSSL_NO_ASM
+.section	.note.GNU-stack,"",%progbits
+#endif  // defined(__arm__) && defined(__linux__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghashv8-armx64.ios.aarch64.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghashv8-armx64.ios.aarch64.S
@ -0,0 +1,253 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__aarch64__) && defined(__APPLE__)
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+#include <CBigNumBoringSSL_arm_arch.h>
+
+.text
+
+.globl	_gcm_init_v8
+.private_extern	_gcm_init_v8
+
+.align	4
+_gcm_init_v8:
+	ld1	{v17.2d},[x1]		//load input H
+	movi	v19.16b,#0xe1
+	shl	v19.2d,v19.2d,#57		//0xc2.0
+	ext	v3.16b,v17.16b,v17.16b,#8
+	ushr	v18.2d,v19.2d,#63
+	dup	v17.4s,v17.s[1]
+	ext	v16.16b,v18.16b,v19.16b,#8		//t0=0xc2....01
+	ushr	v18.2d,v3.2d,#63
+	sshr	v17.4s,v17.4s,#31		//broadcast carry bit
+	and	v18.16b,v18.16b,v16.16b
+	shl	v3.2d,v3.2d,#1
+	ext	v18.16b,v18.16b,v18.16b,#8
+	and	v16.16b,v16.16b,v17.16b
+	orr	v3.16b,v3.16b,v18.16b		//H<<<=1
+	eor	v20.16b,v3.16b,v16.16b		//twisted H
+	st1	{v20.2d},[x0],#16		//store Htable[0]
+
+	//calculate H^2
+	ext	v16.16b,v20.16b,v20.16b,#8		//Karatsuba pre-processing
+	pmull	v0.1q,v20.1d,v20.1d
+	eor	v16.16b,v16.16b,v20.16b
+	pmull2	v2.1q,v20.2d,v20.2d
+	pmull	v1.1q,v16.1d,v16.1d
+
+	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
+	eor	v18.16b,v0.16b,v2.16b
+	eor	v1.16b,v1.16b,v17.16b
+	eor	v1.16b,v1.16b,v18.16b
+	pmull	v18.1q,v0.1d,v19.1d		//1st phase
+
+	ins	v2.d[0],v1.d[1]
+	ins	v1.d[1],v0.d[0]
+	eor	v0.16b,v1.16b,v18.16b
+
+	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase
+	pmull	v0.1q,v0.1d,v19.1d
+	eor	v18.16b,v18.16b,v2.16b
+	eor	v22.16b,v0.16b,v18.16b
+
+	ext	v17.16b,v22.16b,v22.16b,#8		//Karatsuba pre-processing
+	eor	v17.16b,v17.16b,v22.16b
+	ext	v21.16b,v16.16b,v17.16b,#8		//pack Karatsuba pre-processed
+	st1	{v21.2d,v22.2d},[x0]		//store Htable[1..2]
+
+	ret
+
+.globl	_gcm_gmult_v8
+.private_extern	_gcm_gmult_v8
+
+.align	4
+_gcm_gmult_v8:
+	ld1	{v17.2d},[x0]		//load Xi
+	movi	v19.16b,#0xe1
+	ld1	{v20.2d,v21.2d},[x1]	//load twisted H, ...
+	shl	v19.2d,v19.2d,#57
+#ifndef __ARMEB__
+	rev64	v17.16b,v17.16b
+#endif
+	ext	v3.16b,v17.16b,v17.16b,#8
+
+	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo
+	eor	v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
+	pmull2	v2.1q,v20.2d,v3.2d		//H.hi·Xi.hi
+	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi)
+
+	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
+	eor	v18.16b,v0.16b,v2.16b
+	eor	v1.16b,v1.16b,v17.16b
+	eor	v1.16b,v1.16b,v18.16b
+	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
+
+	ins	v2.d[0],v1.d[1]
+	ins	v1.d[1],v0.d[0]
+	eor	v0.16b,v1.16b,v18.16b
+
+	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
+	pmull	v0.1q,v0.1d,v19.1d
+	eor	v18.16b,v18.16b,v2.16b
+	eor	v0.16b,v0.16b,v18.16b
+
+#ifndef __ARMEB__
+	rev64	v0.16b,v0.16b
+#endif
+	ext	v0.16b,v0.16b,v0.16b,#8
+	st1	{v0.2d},[x0]		//write out Xi
+
+	ret
+
+.globl	_gcm_ghash_v8
+.private_extern	_gcm_ghash_v8
+
+.align	4
+_gcm_ghash_v8:
+	ld1	{v0.2d},[x0]		//load [rotated] Xi
+						//"[rotated]" means that
+						//loaded value would have
+						//to be rotated in order to
+						//make it appear as in
+						//algorithm specification
+	subs	x3,x3,#32		//see if x3 is 32 or larger
+	mov	x12,#16		//x12 is used as post-
+						//increment for input pointer;
+						//as loop is modulo-scheduled
+						//x12 is zeroed just in time
+						//to preclude overstepping
+						//inp[len], which means that
+						//last block[s] are actually
+						//loaded twice, but last
+						//copy is not processed
+	ld1	{v20.2d,v21.2d},[x1],#32	//load twisted H, ..., H^2
+	movi	v19.16b,#0xe1
+	ld1	{v22.2d},[x1]
+	csel	x12,xzr,x12,eq			//is it time to zero x12?
+	ext	v0.16b,v0.16b,v0.16b,#8		//rotate Xi
+	ld1	{v16.2d},[x2],#16	//load [rotated] I[0]
+	shl	v19.2d,v19.2d,#57		//compose 0xc2.0 constant
+#ifndef __ARMEB__
+	rev64	v16.16b,v16.16b
+	rev64	v0.16b,v0.16b
+#endif
+	ext	v3.16b,v16.16b,v16.16b,#8		//rotate I[0]
+	b.lo	Lodd_tail_v8		//x3 was less than 32
+	ld1	{v17.2d},[x2],x12	//load [rotated] I[1]
+#ifndef __ARMEB__
+	rev64	v17.16b,v17.16b
+#endif
+	ext	v7.16b,v17.16b,v17.16b,#8
+	eor	v3.16b,v3.16b,v0.16b		//I[i]^=Xi
+	pmull	v4.1q,v20.1d,v7.1d		//H·Ii+1
+	eor	v17.16b,v17.16b,v7.16b		//Karatsuba pre-processing
+	pmull2	v6.1q,v20.2d,v7.2d
+	b	Loop_mod2x_v8
+
+.align	4
+Loop_mod2x_v8:
+	ext	v18.16b,v3.16b,v3.16b,#8
+	subs	x3,x3,#32		//is there more data?
+	pmull	v0.1q,v22.1d,v3.1d		//H^2.lo·Xi.lo
+	csel	x12,xzr,x12,lo			//is it time to zero x12?
+
+	pmull	v5.1q,v21.1d,v17.1d
+	eor	v18.16b,v18.16b,v3.16b		//Karatsuba pre-processing
+	pmull2	v2.1q,v22.2d,v3.2d		//H^2.hi·Xi.hi
+	eor	v0.16b,v0.16b,v4.16b		//accumulate
+	pmull2	v1.1q,v21.2d,v18.2d		//(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
+	ld1	{v16.2d},[x2],x12	//load [rotated] I[i+2]
+
+	eor	v2.16b,v2.16b,v6.16b
+	csel	x12,xzr,x12,eq			//is it time to zero x12?
+	eor	v1.16b,v1.16b,v5.16b
+
+	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
+	eor	v18.16b,v0.16b,v2.16b
+	eor	v1.16b,v1.16b,v17.16b
+	ld1	{v17.2d},[x2],x12	//load [rotated] I[i+3]
+#ifndef __ARMEB__
+	rev64	v16.16b,v16.16b
+#endif
+	eor	v1.16b,v1.16b,v18.16b
+	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
+
+#ifndef __ARMEB__
+	rev64	v17.16b,v17.16b
+#endif
+	ins	v2.d[0],v1.d[1]
+	ins	v1.d[1],v0.d[0]
+	ext	v7.16b,v17.16b,v17.16b,#8
+	ext	v3.16b,v16.16b,v16.16b,#8
+	eor	v0.16b,v1.16b,v18.16b
+	pmull	v4.1q,v20.1d,v7.1d		//H·Ii+1
+	eor	v3.16b,v3.16b,v2.16b		//accumulate v3.16b early
+
+	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
+	pmull	v0.1q,v0.1d,v19.1d
+	eor	v3.16b,v3.16b,v18.16b
+	eor	v17.16b,v17.16b,v7.16b		//Karatsuba pre-processing
+	eor	v3.16b,v3.16b,v0.16b
+	pmull2	v6.1q,v20.2d,v7.2d
+	b.hs	Loop_mod2x_v8		//there was at least 32 more bytes
+
+	eor	v2.16b,v2.16b,v18.16b
+	ext	v3.16b,v16.16b,v16.16b,#8		//re-construct v3.16b
+	adds	x3,x3,#32		//re-construct x3
+	eor	v0.16b,v0.16b,v2.16b		//re-construct v0.16b
+	b.eq	Ldone_v8		//is x3 zero?
+Lodd_tail_v8:
+	ext	v18.16b,v0.16b,v0.16b,#8
+	eor	v3.16b,v3.16b,v0.16b		//inp^=Xi
+	eor	v17.16b,v16.16b,v18.16b		//v17.16b is rotated inp^Xi
+
+	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo
+	eor	v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
+	pmull2	v2.1q,v20.2d,v3.2d		//H.hi·Xi.hi
+	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi)
+
+	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
+	eor	v18.16b,v0.16b,v2.16b
+	eor	v1.16b,v1.16b,v17.16b
+	eor	v1.16b,v1.16b,v18.16b
+	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
+
+	ins	v2.d[0],v1.d[1]
+	ins	v1.d[1],v0.d[0]
+	eor	v0.16b,v1.16b,v18.16b
+
+	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
+	pmull	v0.1q,v0.1d,v19.1d
+	eor	v18.16b,v18.16b,v2.16b
+	eor	v0.16b,v0.16b,v18.16b
+
+Ldone_v8:
+#ifndef __ARMEB__
+	rev64	v0.16b,v0.16b
+#endif
+	ext	v0.16b,v0.16b,v0.16b,#8
+	st1	{v0.2d},[x0]		//write out Xi
+
+	ret
+
+.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	2
+#endif  // !OPENSSL_NO_ASM
+#endif  // defined(__aarch64__) && defined(__APPLE__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghashv8-armx64.linux.aarch64.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/ghashv8-armx64.linux.aarch64.S
@ -0,0 +1,256 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__aarch64__) && defined(__linux__)
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(__aarch64__)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+#include <CBigNumBoringSSL_arm_arch.h>
+
+.text
+.arch	armv8-a+crypto
+.globl	gcm_init_v8
+.hidden	gcm_init_v8
+.type	gcm_init_v8,%function
+.align	4
+gcm_init_v8:
+	ld1	{v17.2d},[x1]		//load input H
+	movi	v19.16b,#0xe1
+	shl	v19.2d,v19.2d,#57		//0xc2.0
+	ext	v3.16b,v17.16b,v17.16b,#8
+	ushr	v18.2d,v19.2d,#63
+	dup	v17.4s,v17.s[1]
+	ext	v16.16b,v18.16b,v19.16b,#8		//t0=0xc2....01
+	ushr	v18.2d,v3.2d,#63
+	sshr	v17.4s,v17.4s,#31		//broadcast carry bit
+	and	v18.16b,v18.16b,v16.16b
+	shl	v3.2d,v3.2d,#1
+	ext	v18.16b,v18.16b,v18.16b,#8
+	and	v16.16b,v16.16b,v17.16b
+	orr	v3.16b,v3.16b,v18.16b		//H<<<=1
+	eor	v20.16b,v3.16b,v16.16b		//twisted H
+	st1	{v20.2d},[x0],#16		//store Htable[0]
+
+	//calculate H^2
+	ext	v16.16b,v20.16b,v20.16b,#8		//Karatsuba pre-processing
+	pmull	v0.1q,v20.1d,v20.1d
+	eor	v16.16b,v16.16b,v20.16b
+	pmull2	v2.1q,v20.2d,v20.2d
+	pmull	v1.1q,v16.1d,v16.1d
+
+	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
+	eor	v18.16b,v0.16b,v2.16b
+	eor	v1.16b,v1.16b,v17.16b
+	eor	v1.16b,v1.16b,v18.16b
+	pmull	v18.1q,v0.1d,v19.1d		//1st phase
+
+	ins	v2.d[0],v1.d[1]
+	ins	v1.d[1],v0.d[0]
+	eor	v0.16b,v1.16b,v18.16b
+
+	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase
+	pmull	v0.1q,v0.1d,v19.1d
+	eor	v18.16b,v18.16b,v2.16b
+	eor	v22.16b,v0.16b,v18.16b
+
+	ext	v17.16b,v22.16b,v22.16b,#8		//Karatsuba pre-processing
+	eor	v17.16b,v17.16b,v22.16b
+	ext	v21.16b,v16.16b,v17.16b,#8		//pack Karatsuba pre-processed
+	st1	{v21.2d,v22.2d},[x0]		//store Htable[1..2]
+
+	ret
+.size	gcm_init_v8,.-gcm_init_v8
+.globl	gcm_gmult_v8
+.hidden	gcm_gmult_v8
+.type	gcm_gmult_v8,%function
+.align	4
+gcm_gmult_v8:
+	ld1	{v17.2d},[x0]		//load Xi
+	movi	v19.16b,#0xe1
+	ld1	{v20.2d,v21.2d},[x1]	//load twisted H, ...
+	shl	v19.2d,v19.2d,#57
+#ifndef __ARMEB__
+	rev64	v17.16b,v17.16b
+#endif
+	ext	v3.16b,v17.16b,v17.16b,#8
+
+	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo
+	eor	v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
+	pmull2	v2.1q,v20.2d,v3.2d		//H.hi·Xi.hi
+	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi)
+
+	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
+	eor	v18.16b,v0.16b,v2.16b
+	eor	v1.16b,v1.16b,v17.16b
+	eor	v1.16b,v1.16b,v18.16b
+	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
+
+	ins	v2.d[0],v1.d[1]
+	ins	v1.d[1],v0.d[0]
+	eor	v0.16b,v1.16b,v18.16b
+
+	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
+	pmull	v0.1q,v0.1d,v19.1d
+	eor	v18.16b,v18.16b,v2.16b
+	eor	v0.16b,v0.16b,v18.16b
+
+#ifndef __ARMEB__
+	rev64	v0.16b,v0.16b
+#endif
+	ext	v0.16b,v0.16b,v0.16b,#8
+	st1	{v0.2d},[x0]		//write out Xi
+
+	ret
+.size	gcm_gmult_v8,.-gcm_gmult_v8
+.globl	gcm_ghash_v8
+.hidden	gcm_ghash_v8
+.type	gcm_ghash_v8,%function
+.align	4
+gcm_ghash_v8:
+	ld1	{v0.2d},[x0]		//load [rotated] Xi
+						//"[rotated]" means that
+						//loaded value would have
+						//to be rotated in order to
+						//make it appear as in
+						//algorithm specification
+	subs	x3,x3,#32		//see if x3 is 32 or larger
+	mov	x12,#16		//x12 is used as post-
+						//increment for input pointer;
+						//as loop is modulo-scheduled
+						//x12 is zeroed just in time
+						//to preclude overstepping
+						//inp[len], which means that
+						//last block[s] are actually
+						//loaded twice, but last
+						//copy is not processed
+	ld1	{v20.2d,v21.2d},[x1],#32	//load twisted H, ..., H^2
+	movi	v19.16b,#0xe1
+	ld1	{v22.2d},[x1]
+	csel	x12,xzr,x12,eq			//is it time to zero x12?
+	ext	v0.16b,v0.16b,v0.16b,#8		//rotate Xi
+	ld1	{v16.2d},[x2],#16	//load [rotated] I[0]
+	shl	v19.2d,v19.2d,#57		//compose 0xc2.0 constant
+#ifndef __ARMEB__
+	rev64	v16.16b,v16.16b
+	rev64	v0.16b,v0.16b
+#endif
+	ext	v3.16b,v16.16b,v16.16b,#8		//rotate I[0]
+	b.lo	.Lodd_tail_v8		//x3 was less than 32
+	ld1	{v17.2d},[x2],x12	//load [rotated] I[1]
+#ifndef __ARMEB__
+	rev64	v17.16b,v17.16b
+#endif
+	ext	v7.16b,v17.16b,v17.16b,#8
+	eor	v3.16b,v3.16b,v0.16b		//I[i]^=Xi
+	pmull	v4.1q,v20.1d,v7.1d		//H·Ii+1
+	eor	v17.16b,v17.16b,v7.16b		//Karatsuba pre-processing
+	pmull2	v6.1q,v20.2d,v7.2d
+	b	.Loop_mod2x_v8
+
+.align	4
+.Loop_mod2x_v8:
+	ext	v18.16b,v3.16b,v3.16b,#8
+	subs	x3,x3,#32		//is there more data?
+	pmull	v0.1q,v22.1d,v3.1d		//H^2.lo·Xi.lo
+	csel	x12,xzr,x12,lo			//is it time to zero x12?
+
+	pmull	v5.1q,v21.1d,v17.1d
+	eor	v18.16b,v18.16b,v3.16b		//Karatsuba pre-processing
+	pmull2	v2.1q,v22.2d,v3.2d		//H^2.hi·Xi.hi
+	eor	v0.16b,v0.16b,v4.16b		//accumulate
+	pmull2	v1.1q,v21.2d,v18.2d		//(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
+	ld1	{v16.2d},[x2],x12	//load [rotated] I[i+2]
+
+	eor	v2.16b,v2.16b,v6.16b
+	csel	x12,xzr,x12,eq			//is it time to zero x12?
+	eor	v1.16b,v1.16b,v5.16b
+
+	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
+	eor	v18.16b,v0.16b,v2.16b
+	eor	v1.16b,v1.16b,v17.16b
+	ld1	{v17.2d},[x2],x12	//load [rotated] I[i+3]
+#ifndef __ARMEB__
+	rev64	v16.16b,v16.16b
+#endif
+	eor	v1.16b,v1.16b,v18.16b
+	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
+
+#ifndef __ARMEB__
+	rev64	v17.16b,v17.16b
+#endif
+	ins	v2.d[0],v1.d[1]
+	ins	v1.d[1],v0.d[0]
+	ext	v7.16b,v17.16b,v17.16b,#8
+	ext	v3.16b,v16.16b,v16.16b,#8
+	eor	v0.16b,v1.16b,v18.16b
+	pmull	v4.1q,v20.1d,v7.1d		//H·Ii+1
+	eor	v3.16b,v3.16b,v2.16b		//accumulate v3.16b early
+
+	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
+	pmull	v0.1q,v0.1d,v19.1d
+	eor	v3.16b,v3.16b,v18.16b
+	eor	v17.16b,v17.16b,v7.16b		//Karatsuba pre-processing
+	eor	v3.16b,v3.16b,v0.16b
+	pmull2	v6.1q,v20.2d,v7.2d
+	b.hs	.Loop_mod2x_v8		//there was at least 32 more bytes
+
+	eor	v2.16b,v2.16b,v18.16b
+	ext	v3.16b,v16.16b,v16.16b,#8		//re-construct v3.16b
+	adds	x3,x3,#32		//re-construct x3
+	eor	v0.16b,v0.16b,v2.16b		//re-construct v0.16b
+	b.eq	.Ldone_v8		//is x3 zero?
+.Lodd_tail_v8:
+	ext	v18.16b,v0.16b,v0.16b,#8
+	eor	v3.16b,v3.16b,v0.16b		//inp^=Xi
+	eor	v17.16b,v16.16b,v18.16b		//v17.16b is rotated inp^Xi
+
+	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo
+	eor	v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
+	pmull2	v2.1q,v20.2d,v3.2d		//H.hi·Xi.hi
+	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi)
+
+	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
+	eor	v18.16b,v0.16b,v2.16b
+	eor	v1.16b,v1.16b,v17.16b
+	eor	v1.16b,v1.16b,v18.16b
+	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
+
+	ins	v2.d[0],v1.d[1]
+	ins	v1.d[1],v0.d[0]
+	eor	v0.16b,v1.16b,v18.16b
+
+	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
+	pmull	v0.1q,v0.1d,v19.1d
+	eor	v18.16b,v18.16b,v2.16b
+	eor	v0.16b,v0.16b,v18.16b
+
+.Ldone_v8:
+#ifndef __ARMEB__
+	rev64	v0.16b,v0.16b
+#endif
+	ext	v0.16b,v0.16b,v0.16b,#8
+	st1	{v0.2d},[x0]		//write out Xi
+
+	ret
+.size	gcm_ghash_v8,.-gcm_ghash_v8
+.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	2
+#endif
+#endif  // !OPENSSL_NO_ASM
+.section	.note.GNU-stack,"",%progbits
+#endif  // defined(__aarch64__) && defined(__linux__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/md5-586.linux.x86.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/md5-586.linux.x86.S
@ -0,0 +1,695 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__i386__) && defined(__linux__)
+# This file is generated from a similarly-named Perl script in the BoringSSL
+# source tree. Do not edit by hand.
+
+#if defined(__i386__)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+.text
+.globl	md5_block_asm_data_order
+.hidden	md5_block_asm_data_order
+.type	md5_block_asm_data_order,@function
+.align	16
+md5_block_asm_data_order:
+.L_md5_block_asm_data_order_begin:
+	pushl	%esi
+	pushl	%edi
+	movl	12(%esp),%edi
+	movl	16(%esp),%esi
+	movl	20(%esp),%ecx
+	pushl	%ebp
+	shll	$6,%ecx
+	pushl	%ebx
+	addl	%esi,%ecx
+	subl	$64,%ecx
+	movl	(%edi),%eax
+	pushl	%ecx
+	movl	4(%edi),%ebx
+	movl	8(%edi),%ecx
+	movl	12(%edi),%edx
+.L000start:
+
+
+	movl	%ecx,%edi
+	movl	(%esi),%ebp
+
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	3614090360(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	4(%esi),%ebp
+	addl	%ebx,%eax
+
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	3905402710(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	8(%esi),%ebp
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	606105819(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	12(%esi),%ebp
+	addl	%edx,%ecx
+
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	3250441966(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	16(%esi),%ebp
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	4118548399(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	20(%esi),%ebp
+	addl	%ebx,%eax
+
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	1200080426(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	24(%esi),%ebp
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	2821735955(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	28(%esi),%ebp
+	addl	%edx,%ecx
+
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	4249261313(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	32(%esi),%ebp
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	1770035416(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	36(%esi),%ebp
+	addl	%ebx,%eax
+
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	2336552879(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	40(%esi),%ebp
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	4294925233(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	44(%esi),%ebp
+	addl	%edx,%ecx
+
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	2304563134(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	48(%esi),%ebp
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	leal	1804603682(%eax,%ebp,1),%eax
+	xorl	%edx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$7,%eax
+	movl	52(%esi),%ebp
+	addl	%ebx,%eax
+
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	leal	4254626195(%edx,%ebp,1),%edx
+	xorl	%ecx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$12,%edx
+	movl	56(%esi),%ebp
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	leal	2792965006(%ecx,%ebp,1),%ecx
+	xorl	%ebx,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$17,%ecx
+	movl	60(%esi),%ebp
+	addl	%edx,%ecx
+
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	leal	1236535329(%ebx,%ebp,1),%ebx
+	xorl	%eax,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$22,%ebx
+	movl	4(%esi),%ebp
+	addl	%ecx,%ebx
+
+
+
+	leal	4129170786(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	24(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+
+	leal	3225465664(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	44(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+
+	leal	643717713(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+
+	leal	3921069994(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	20(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+
+	leal	3593408605(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	40(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+
+	leal	38016083(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	60(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+
+	leal	3634488961(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	16(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+
+	leal	3889429448(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	36(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+
+	leal	568446438(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	56(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+
+	leal	3275163606(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	12(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+
+	leal	4107603335(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	32(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+
+	leal	1163531501(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	52(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+
+	leal	2850285829(%eax,%ebp,1),%eax
+	xorl	%ebx,%edi
+	andl	%edx,%edi
+	movl	8(%esi),%ebp
+	xorl	%ecx,%edi
+	addl	%edi,%eax
+	movl	%ebx,%edi
+	roll	$5,%eax
+	addl	%ebx,%eax
+
+	leal	4243563512(%edx,%ebp,1),%edx
+	xorl	%eax,%edi
+	andl	%ecx,%edi
+	movl	28(%esi),%ebp
+	xorl	%ebx,%edi
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$9,%edx
+	addl	%eax,%edx
+
+	leal	1735328473(%ecx,%ebp,1),%ecx
+	xorl	%edx,%edi
+	andl	%ebx,%edi
+	movl	48(%esi),%ebp
+	xorl	%eax,%edi
+	addl	%edi,%ecx
+	movl	%edx,%edi
+	roll	$14,%ecx
+	addl	%edx,%ecx
+
+	leal	2368359562(%ebx,%ebp,1),%ebx
+	xorl	%ecx,%edi
+	andl	%eax,%edi
+	movl	20(%esi),%ebp
+	xorl	%edx,%edi
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+
+
+
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	4294588738(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	32(%esi),%ebp
+	movl	%ebx,%edi
+
+	leal	2272392833(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	44(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	1839030562(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	56(%esi),%ebp
+	movl	%edx,%edi
+
+	leal	4259657740(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	4(%esi),%ebp
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	2763975236(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	16(%esi),%ebp
+	movl	%ebx,%edi
+
+	leal	1272893353(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	28(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	4139469664(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	40(%esi),%ebp
+	movl	%edx,%edi
+
+	leal	3200236656(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	52(%esi),%ebp
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	681279174(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	(%esi),%ebp
+	movl	%ebx,%edi
+
+	leal	3936430074(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	12(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	3572445317(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	24(%esi),%ebp
+	movl	%edx,%edi
+
+	leal	76029189(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	36(%esi),%ebp
+	addl	%edi,%ebx
+	movl	%ecx,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+
+	xorl	%edx,%edi
+	xorl	%ebx,%edi
+	leal	3654602809(%eax,%ebp,1),%eax
+	addl	%edi,%eax
+	roll	$4,%eax
+	movl	48(%esi),%ebp
+	movl	%ebx,%edi
+
+	leal	3873151461(%edx,%ebp,1),%edx
+	addl	%ebx,%eax
+	xorl	%ecx,%edi
+	xorl	%eax,%edi
+	movl	60(%esi),%ebp
+	addl	%edi,%edx
+	movl	%eax,%edi
+	roll	$11,%edx
+	addl	%eax,%edx
+
+	xorl	%ebx,%edi
+	xorl	%edx,%edi
+	leal	530742520(%ecx,%ebp,1),%ecx
+	addl	%edi,%ecx
+	roll	$16,%ecx
+	movl	8(%esi),%ebp
+	movl	%edx,%edi
+
+	leal	3299628645(%ebx,%ebp,1),%ebx
+	addl	%edx,%ecx
+	xorl	%eax,%edi
+	xorl	%ecx,%edi
+	movl	(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$23,%ebx
+	addl	%ecx,%ebx
+
+
+
+	xorl	%edx,%edi
+	orl	%ebx,%edi
+	leal	4096336452(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	28(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+
+	orl	%eax,%edi
+	leal	1126891415(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	56(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+
+	orl	%edx,%edi
+	leal	2878612391(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	20(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+
+	orl	%ecx,%edi
+	leal	4237533241(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	48(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$21,%ebx
+	xorl	%edx,%edi
+	addl	%ecx,%ebx
+
+	orl	%ebx,%edi
+	leal	1700485571(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	12(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+
+	orl	%eax,%edi
+	leal	2399980690(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	40(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+
+	orl	%edx,%edi
+	leal	4293915773(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	4(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+
+	orl	%ecx,%edi
+	leal	2240044497(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	32(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$21,%ebx
+	xorl	%edx,%edi
+	addl	%ecx,%ebx
+
+	orl	%ebx,%edi
+	leal	1873313359(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	60(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+
+	orl	%eax,%edi
+	leal	4264355552(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	24(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+
+	orl	%edx,%edi
+	leal	2734768916(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	52(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+
+	orl	%ecx,%edi
+	leal	1309151649(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	16(%esi),%ebp
+	addl	%edi,%ebx
+	movl	$-1,%edi
+	roll	$21,%ebx
+	xorl	%edx,%edi
+	addl	%ecx,%ebx
+
+	orl	%ebx,%edi
+	leal	4149444226(%eax,%ebp,1),%eax
+	xorl	%ecx,%edi
+	movl	44(%esi),%ebp
+	addl	%edi,%eax
+	movl	$-1,%edi
+	roll	$6,%eax
+	xorl	%ecx,%edi
+	addl	%ebx,%eax
+
+	orl	%eax,%edi
+	leal	3174756917(%edx,%ebp,1),%edx
+	xorl	%ebx,%edi
+	movl	8(%esi),%ebp
+	addl	%edi,%edx
+	movl	$-1,%edi
+	roll	$10,%edx
+	xorl	%ebx,%edi
+	addl	%eax,%edx
+
+	orl	%edx,%edi
+	leal	718787259(%ecx,%ebp,1),%ecx
+	xorl	%eax,%edi
+	movl	36(%esi),%ebp
+	addl	%edi,%ecx
+	movl	$-1,%edi
+	roll	$15,%ecx
+	xorl	%eax,%edi
+	addl	%edx,%ecx
+
+	orl	%ecx,%edi
+	leal	3951481745(%ebx,%ebp,1),%ebx
+	xorl	%edx,%edi
+	movl	24(%esp),%ebp
+	addl	%edi,%ebx
+	addl	$64,%esi
+	roll	$21,%ebx
+	movl	(%ebp),%edi
+	addl	%ecx,%ebx
+	addl	%edi,%eax
+	movl	4(%ebp),%edi
+	addl	%edi,%ebx
+	movl	8(%ebp),%edi
+	addl	%edi,%ecx
+	movl	12(%ebp),%edi
+	addl	%edi,%edx
+	movl	%eax,(%ebp)
+	movl	%ebx,4(%ebp)
+	movl	(%esp),%edi
+	movl	%ecx,8(%ebp)
+	movl	%edx,12(%ebp)
+	cmpl	%esi,%edi
+	jae	.L000start
+	popl	%eax
+	popl	%ebx
+	popl	%ebp
+	popl	%edi
+	popl	%esi
+	ret
+.size	md5_block_asm_data_order,.-.L_md5_block_asm_data_order_begin
+#endif
+.section	.note.GNU-stack,"",@progbits
+#endif  // defined(__i386__) && defined(__linux__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/md5-x86_64.linux.x86_64.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/md5-x86_64.linux.x86_64.S
@ -0,0 +1,709 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__x86_64__) && defined(__linux__)
+# This file is generated from a similarly-named Perl script in the BoringSSL
+# source tree. Do not edit by hand.
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+#endif
+
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+.text	
+.align	16
+
+.globl	md5_block_asm_data_order
+.hidden md5_block_asm_data_order
+.type	md5_block_asm_data_order,@function
+md5_block_asm_data_order:
+.cfi_startproc	
+	pushq	%rbp
+.cfi_adjust_cfa_offset	8
+.cfi_offset	rbp,-16
+	pushq	%rbx
+.cfi_adjust_cfa_offset	8
+.cfi_offset	rbx,-24
+	pushq	%r12
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r12,-32
+	pushq	%r14
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r14,-40
+	pushq	%r15
+.cfi_adjust_cfa_offset	8
+.cfi_offset	r15,-48
+.Lprologue:
+
+
+
+
+	movq	%rdi,%rbp
+	shlq	$6,%rdx
+	leaq	(%rsi,%rdx,1),%rdi
+	movl	0(%rbp),%eax
+	movl	4(%rbp),%ebx
+	movl	8(%rbp),%ecx
+	movl	12(%rbp),%edx
+
+
+
+
+
+
+
+	cmpq	%rdi,%rsi
+	je	.Lend
+
+
+.Lloop:
+	movl	%eax,%r8d
+	movl	%ebx,%r9d
+	movl	%ecx,%r14d
+	movl	%edx,%r15d
+	movl	0(%rsi),%r10d
+	movl	%edx,%r11d
+	xorl	%ecx,%r11d
+	leal	-680876936(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	4(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	-389564586(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	8(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	606105819(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	12(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	-1044525330(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	16(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	xorl	%ecx,%r11d
+	leal	-176418897(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	20(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	1200080426(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	24(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	-1473231341(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	28(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	-45705983(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	32(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	xorl	%ecx,%r11d
+	leal	1770035416(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	36(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	-1958414417(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	40(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	-42063(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	44(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	-1990404162(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	48(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	xorl	%ecx,%r11d
+	leal	1804603682(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	52(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	-40341101(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	56(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	-1502002290(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	60(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	1236535329(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	0(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	movl	4(%rsi),%r10d
+	movl	%edx,%r11d
+	movl	%edx,%r12d
+	notl	%r11d
+	leal	-165796510(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	24(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	-1069501632(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	44(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	643717713(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	0(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	-373897302(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	20(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	notl	%r11d
+	leal	-701558691(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	40(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	38016083(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	60(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	-660478335(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	16(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	-405537848(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	36(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	notl	%r11d
+	leal	568446438(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	56(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	-1019803690(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	12(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	-187363961(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	32(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	1163531501(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	52(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	notl	%r11d
+	leal	-1444681467(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	8(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	-51403784(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	28(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	1735328473(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	48(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	-1926607734(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	0(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	movl	20(%rsi),%r10d
+	movl	%ecx,%r11d
+	leal	-378558(%rax,%r10,1),%eax
+	movl	32(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	-2022574463(%rdx,%r10,1),%edx
+	movl	44(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	1839030562(%rcx,%r10,1),%ecx
+	movl	56(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	-35309556(%rbx,%r10,1),%ebx
+	movl	4(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	leal	-1530992060(%rax,%r10,1),%eax
+	movl	16(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	1272893353(%rdx,%r10,1),%edx
+	movl	28(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	-155497632(%rcx,%r10,1),%ecx
+	movl	40(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	-1094730640(%rbx,%r10,1),%ebx
+	movl	52(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	leal	681279174(%rax,%r10,1),%eax
+	movl	0(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	-358537222(%rdx,%r10,1),%edx
+	movl	12(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	-722521979(%rcx,%r10,1),%ecx
+	movl	24(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	76029189(%rbx,%r10,1),%ebx
+	movl	36(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	leal	-640364487(%rax,%r10,1),%eax
+	movl	48(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	-421815835(%rdx,%r10,1),%edx
+	movl	60(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	530742520(%rcx,%r10,1),%ecx
+	movl	8(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	-995338651(%rbx,%r10,1),%ebx
+	movl	0(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	movl	0(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	xorl	%edx,%r11d
+	leal	-198630844(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	28(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	1126891415(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	56(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	-1416354905(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	20(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	-57434055(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	48(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+	leal	1700485571(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	12(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	-1894986606(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	40(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	-1051523(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	4(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	-2054922799(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	32(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+	leal	1873313359(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	60(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	-30611744(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	24(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	-1560198380(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	52(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	1309151649(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	16(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+	leal	-145523070(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	44(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	-1120210379(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	8(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	718787259(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	36(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	-343485551(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	0(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+
+	addl	%r8d,%eax
+	addl	%r9d,%ebx
+	addl	%r14d,%ecx
+	addl	%r15d,%edx
+
+
+	addq	$64,%rsi
+	cmpq	%rdi,%rsi
+	jb	.Lloop
+
+
+.Lend:
+	movl	%eax,0(%rbp)
+	movl	%ebx,4(%rbp)
+	movl	%ecx,8(%rbp)
+	movl	%edx,12(%rbp)
+
+	movq	(%rsp),%r15
+.cfi_restore	r15
+	movq	8(%rsp),%r14
+.cfi_restore	r14
+	movq	16(%rsp),%r12
+.cfi_restore	r12
+	movq	24(%rsp),%rbx
+.cfi_restore	rbx
+	movq	32(%rsp),%rbp
+.cfi_restore	rbp
+	addq	$40,%rsp
+.cfi_adjust_cfa_offset	-40
+.Lepilogue:
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	md5_block_asm_data_order,.-md5_block_asm_data_order
+#endif
+.section	.note.GNU-stack,"",@progbits
+#endif  // defined(__x86_64__) && defined(__linux__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/md5-x86_64.mac.x86_64.S
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/md5-x86_64.mac.x86_64.S
@ -0,0 +1,703 @@
+#define BORINGSSL_PREFIX CBigNumBoringSSL
+#if defined(__x86_64__) && defined(__APPLE__)
+# This file is generated from a similarly-named Perl script in the BoringSSL
+# source tree. Do not edit by hand.
+
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+#endif
+
+#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
+#if defined(BORINGSSL_PREFIX)
+#include <CBigNumBoringSSL_boringssl_prefix_symbols_asm.h>
+#endif
+.text	
+.p2align	4
+
+.globl	_md5_block_asm_data_order
+.private_extern _md5_block_asm_data_order
+
+_md5_block_asm_data_order:
+
+	pushq	%rbp
+
+	pushq	%rbx
+
+	pushq	%r12
+
+	pushq	%r14
+
+	pushq	%r15
+
+L$prologue:
+
+
+
+
+	movq	%rdi,%rbp
+	shlq	$6,%rdx
+	leaq	(%rsi,%rdx,1),%rdi
+	movl	0(%rbp),%eax
+	movl	4(%rbp),%ebx
+	movl	8(%rbp),%ecx
+	movl	12(%rbp),%edx
+
+
+
+
+
+
+
+	cmpq	%rdi,%rsi
+	je	L$end
+
+
+L$loop:
+	movl	%eax,%r8d
+	movl	%ebx,%r9d
+	movl	%ecx,%r14d
+	movl	%edx,%r15d
+	movl	0(%rsi),%r10d
+	movl	%edx,%r11d
+	xorl	%ecx,%r11d
+	leal	-680876936(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	4(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	-389564586(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	8(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	606105819(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	12(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	-1044525330(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	16(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	xorl	%ecx,%r11d
+	leal	-176418897(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	20(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	1200080426(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	24(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	-1473231341(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	28(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	-45705983(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	32(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	xorl	%ecx,%r11d
+	leal	1770035416(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	36(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	-1958414417(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	40(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	-42063(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	44(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	-1990404162(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	48(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	xorl	%ecx,%r11d
+	leal	1804603682(%rax,%r10,1),%eax
+	andl	%ebx,%r11d
+	xorl	%edx,%r11d
+	movl	52(%rsi),%r10d
+	addl	%r11d,%eax
+	roll	$7,%eax
+	movl	%ecx,%r11d
+	addl	%ebx,%eax
+	xorl	%ebx,%r11d
+	leal	-40341101(%rdx,%r10,1),%edx
+	andl	%eax,%r11d
+	xorl	%ecx,%r11d
+	movl	56(%rsi),%r10d
+	addl	%r11d,%edx
+	roll	$12,%edx
+	movl	%ebx,%r11d
+	addl	%eax,%edx
+	xorl	%eax,%r11d
+	leal	-1502002290(%rcx,%r10,1),%ecx
+	andl	%edx,%r11d
+	xorl	%ebx,%r11d
+	movl	60(%rsi),%r10d
+	addl	%r11d,%ecx
+	roll	$17,%ecx
+	movl	%eax,%r11d
+	addl	%edx,%ecx
+	xorl	%edx,%r11d
+	leal	1236535329(%rbx,%r10,1),%ebx
+	andl	%ecx,%r11d
+	xorl	%eax,%r11d
+	movl	0(%rsi),%r10d
+	addl	%r11d,%ebx
+	roll	$22,%ebx
+	movl	%edx,%r11d
+	addl	%ecx,%ebx
+	movl	4(%rsi),%r10d
+	movl	%edx,%r11d
+	movl	%edx,%r12d
+	notl	%r11d
+	leal	-165796510(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	24(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	-1069501632(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	44(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	643717713(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	0(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	-373897302(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	20(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	notl	%r11d
+	leal	-701558691(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	40(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	38016083(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	60(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	-660478335(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	16(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	-405537848(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	36(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	notl	%r11d
+	leal	568446438(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	56(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	-1019803690(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	12(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	-187363961(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	32(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	1163531501(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	52(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	notl	%r11d
+	leal	-1444681467(%rax,%r10,1),%eax
+	andl	%ebx,%r12d
+	andl	%ecx,%r11d
+	movl	8(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ecx,%r11d
+	addl	%r12d,%eax
+	movl	%ecx,%r12d
+	roll	$5,%eax
+	addl	%ebx,%eax
+	notl	%r11d
+	leal	-51403784(%rdx,%r10,1),%edx
+	andl	%eax,%r12d
+	andl	%ebx,%r11d
+	movl	28(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%ebx,%r11d
+	addl	%r12d,%edx
+	movl	%ebx,%r12d
+	roll	$9,%edx
+	addl	%eax,%edx
+	notl	%r11d
+	leal	1735328473(%rcx,%r10,1),%ecx
+	andl	%edx,%r12d
+	andl	%eax,%r11d
+	movl	48(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%eax,%r11d
+	addl	%r12d,%ecx
+	movl	%eax,%r12d
+	roll	$14,%ecx
+	addl	%edx,%ecx
+	notl	%r11d
+	leal	-1926607734(%rbx,%r10,1),%ebx
+	andl	%ecx,%r12d
+	andl	%edx,%r11d
+	movl	0(%rsi),%r10d
+	orl	%r11d,%r12d
+	movl	%edx,%r11d
+	addl	%r12d,%ebx
+	movl	%edx,%r12d
+	roll	$20,%ebx
+	addl	%ecx,%ebx
+	movl	20(%rsi),%r10d
+	movl	%ecx,%r11d
+	leal	-378558(%rax,%r10,1),%eax
+	movl	32(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	-2022574463(%rdx,%r10,1),%edx
+	movl	44(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	1839030562(%rcx,%r10,1),%ecx
+	movl	56(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	-35309556(%rbx,%r10,1),%ebx
+	movl	4(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	leal	-1530992060(%rax,%r10,1),%eax
+	movl	16(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	1272893353(%rdx,%r10,1),%edx
+	movl	28(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	-155497632(%rcx,%r10,1),%ecx
+	movl	40(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	-1094730640(%rbx,%r10,1),%ebx
+	movl	52(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	leal	681279174(%rax,%r10,1),%eax
+	movl	0(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	-358537222(%rdx,%r10,1),%edx
+	movl	12(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	-722521979(%rcx,%r10,1),%ecx
+	movl	24(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	76029189(%rbx,%r10,1),%ebx
+	movl	36(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	leal	-640364487(%rax,%r10,1),%eax
+	movl	48(%rsi),%r10d
+	xorl	%edx,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%eax
+	roll	$4,%eax
+	movl	%ebx,%r11d
+	addl	%ebx,%eax
+	leal	-421815835(%rdx,%r10,1),%edx
+	movl	60(%rsi),%r10d
+	xorl	%ecx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%edx
+	roll	$11,%edx
+	movl	%eax,%r11d
+	addl	%eax,%edx
+	leal	530742520(%rcx,%r10,1),%ecx
+	movl	8(%rsi),%r10d
+	xorl	%ebx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ecx
+	roll	$16,%ecx
+	movl	%edx,%r11d
+	addl	%edx,%ecx
+	leal	-995338651(%rbx,%r10,1),%ebx
+	movl	0(%rsi),%r10d
+	xorl	%eax,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%ebx
+	roll	$23,%ebx
+	movl	%ecx,%r11d
+	addl	%ecx,%ebx
+	movl	0(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	xorl	%edx,%r11d
+	leal	-198630844(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	28(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	1126891415(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	56(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	-1416354905(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	20(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	-57434055(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	48(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+	leal	1700485571(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	12(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	-1894986606(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	40(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	-1051523(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	4(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	-2054922799(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	32(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+	leal	1873313359(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	60(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	-30611744(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	24(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	-1560198380(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	52(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	1309151649(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	16(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+	leal	-145523070(%rax,%r10,1),%eax
+	orl	%ebx,%r11d
+	xorl	%ecx,%r11d
+	addl	%r11d,%eax
+	movl	44(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$6,%eax
+	xorl	%ecx,%r11d
+	addl	%ebx,%eax
+	leal	-1120210379(%rdx,%r10,1),%edx
+	orl	%eax,%r11d
+	xorl	%ebx,%r11d
+	addl	%r11d,%edx
+	movl	8(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$10,%edx
+	xorl	%ebx,%r11d
+	addl	%eax,%edx
+	leal	718787259(%rcx,%r10,1),%ecx
+	orl	%edx,%r11d
+	xorl	%eax,%r11d
+	addl	%r11d,%ecx
+	movl	36(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$15,%ecx
+	xorl	%eax,%r11d
+	addl	%edx,%ecx
+	leal	-343485551(%rbx,%r10,1),%ebx
+	orl	%ecx,%r11d
+	xorl	%edx,%r11d
+	addl	%r11d,%ebx
+	movl	0(%rsi),%r10d
+	movl	$0xffffffff,%r11d
+	roll	$21,%ebx
+	xorl	%edx,%r11d
+	addl	%ecx,%ebx
+
+	addl	%r8d,%eax
+	addl	%r9d,%ebx
+	addl	%r14d,%ecx
+	addl	%r15d,%edx
+
+
+	addq	$64,%rsi
+	cmpq	%rdi,%rsi
+	jb	L$loop
+
+
+L$end:
+	movl	%eax,0(%rbp)
+	movl	%ebx,4(%rbp)
+	movl	%ecx,8(%rbp)
+	movl	%edx,12(%rbp)
+
+	movq	(%rsp),%r15
+
+	movq	8(%rsp),%r14
+
+	movq	16(%rsp),%r12
+
+	movq	24(%rsp),%rbx
+
+	movq	32(%rsp),%rbp
+
+	addq	$40,%rsp
+
+L$epilogue:
+	.byte	0xf3,0xc3
+
+
+#endif
+#endif  // defined(__x86_64__) && defined(__APPLE__)
+#if defined(__linux__) && defined(__ELF__)
+.section .note.GNU-stack,"",%progbits
+#endif
+
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/modes/cbc.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/modes/cbc.c
@ -0,0 +1,167 @@
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ==================================================================== */
+
+#include <assert.h>
+#include <string.h>
+
+#include <CBigNumBoringSSL_type_check.h>
+
+#include "internal.h"
+
+
+void CRYPTO_cbc128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
+                           const AES_KEY *key, uint8_t ivec[16],
+                           block128_f block) {
+  size_t n;
+  const uint8_t *iv = ivec;
+
+  assert(key != NULL && ivec != NULL);
+  assert(len == 0 || (in != NULL && out != NULL));
+
+  while (len >= 16) {
+    for (n = 0; n < 16; n += sizeof(size_t)) {
+      store_word_le(out + n, load_word_le(in + n) ^ load_word_le(iv + n));
+    }
+    (*block)(out, out, key);
+    iv = out;
+    len -= 16;
+    in += 16;
+    out += 16;
+  }
+
+  while (len) {
+    for (n = 0; n < 16 && n < len; ++n) {
+      out[n] = in[n] ^ iv[n];
+    }
+    for (; n < 16; ++n) {
+      out[n] = iv[n];
+    }
+    (*block)(out, out, key);
+    iv = out;
+    if (len <= 16) {
+      break;
+    }
+    len -= 16;
+    in += 16;
+    out += 16;
+  }
+
+  OPENSSL_memcpy(ivec, iv, 16);
+}
+
+void CRYPTO_cbc128_decrypt(const uint8_t *in, uint8_t *out, size_t len,
+                           const AES_KEY *key, uint8_t ivec[16],
+                           block128_f block) {
+  size_t n;
+  union {
+    size_t t[16 / sizeof(size_t)];
+    uint8_t c[16];
+  } tmp;
+
+  assert(key != NULL && ivec != NULL);
+  assert(len == 0 || (in != NULL && out != NULL));
+
+  const uintptr_t inptr = (uintptr_t) in;
+  const uintptr_t outptr = (uintptr_t) out;
+  // If |in| and |out| alias, |in| must be ahead.
+  assert(inptr >= outptr || inptr + len <= outptr);
+
+  if ((inptr >= 32 && outptr <= inptr - 32) || inptr < outptr) {
+    // If |out| is at least two blocks behind |in| or completely disjoint, there
+    // is no need to decrypt to a temporary block.
+    OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
+                          "block cannot be evenly divided into words");
+    const uint8_t *iv = ivec;
+    while (len >= 16) {
+      (*block)(in, out, key);
+      for (n = 0; n < 16; n += sizeof(size_t)) {
+        store_word_le(out + n, load_word_le(out + n) ^ load_word_le(iv + n));
+      }
+      iv = in;
+      len -= 16;
+      in += 16;
+      out += 16;
+    }
+    OPENSSL_memcpy(ivec, iv, 16);
+  } else {
+    OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
+                          "block cannot be evenly divided into words");
+
+    while (len >= 16) {
+      (*block)(in, tmp.c, key);
+      for (n = 0; n < 16; n += sizeof(size_t)) {
+        size_t c = load_word_le(in + n);
+        store_word_le(out + n,
+                      tmp.t[n / sizeof(size_t)] ^ load_word_le(ivec + n));
+        store_word_le(ivec + n, c);
+      }
+      len -= 16;
+      in += 16;
+      out += 16;
+    }
+  }
+
+  while (len) {
+    uint8_t c;
+    (*block)(in, tmp.c, key);
+    for (n = 0; n < 16 && n < len; ++n) {
+      c = in[n];
+      out[n] = tmp.c[n] ^ ivec[n];
+      ivec[n] = c;
+    }
+    if (len <= 16) {
+      for (; n < 16; ++n) {
+        ivec[n] = in[n];
+      }
+      break;
+    }
+    len -= 16;
+    in += 16;
+    out += 16;
+  }
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/modes/cfb.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/modes/cfb.c
@ -0,0 +1,202 @@
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ==================================================================== */
+
+#include <CBigNumBoringSSL_type_check.h>
+
+#include <assert.h>
+#include <string.h>
+
+#include "internal.h"
+
+
+OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
+                      "block cannot be divided into size_t");
+
+void CRYPTO_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
+                           const AES_KEY *key, uint8_t ivec[16], unsigned *num,
+                           int enc, block128_f block) {
+  assert(in && out && key && ivec && num);
+
+  unsigned n = *num;
+
+  if (enc) {
+    while (n && len) {
+      *(out++) = ivec[n] ^= *(in++);
+      --len;
+      n = (n + 1) % 16;
+    }
+    while (len >= 16) {
+      (*block)(ivec, ivec, key);
+      for (; n < 16; n += sizeof(size_t)) {
+        size_t tmp = load_word_le(ivec + n) ^ load_word_le(in + n);
+        store_word_le(ivec + n, tmp);
+        store_word_le(out + n, tmp);
+      }
+      len -= 16;
+      out += 16;
+      in += 16;
+      n = 0;
+    }
+    if (len) {
+      (*block)(ivec, ivec, key);
+      while (len--) {
+        out[n] = ivec[n] ^= in[n];
+        ++n;
+      }
+    }
+    *num = n;
+    return;
+  } else {
+    while (n && len) {
+      uint8_t c;
+      *(out++) = ivec[n] ^ (c = *(in++));
+      ivec[n] = c;
+      --len;
+      n = (n + 1) % 16;
+    }
+    while (len >= 16) {
+      (*block)(ivec, ivec, key);
+      for (; n < 16; n += sizeof(size_t)) {
+        size_t t = load_word_le(in + n);
+        store_word_le(out + n, load_word_le(ivec + n) ^ t);
+        store_word_le(ivec + n, t);
+      }
+      len -= 16;
+      out += 16;
+      in += 16;
+      n = 0;
+    }
+    if (len) {
+      (*block)(ivec, ivec, key);
+      while (len--) {
+        uint8_t c;
+        out[n] = ivec[n] ^ (c = in[n]);
+        ivec[n] = c;
+        ++n;
+      }
+    }
+    *num = n;
+    return;
+  }
+}
+
+
+/* This expects a single block of size nbits for both in and out. Note that
+   it corrupts any extra bits in the last byte of out */
+static void cfbr_encrypt_block(const uint8_t *in, uint8_t *out, unsigned nbits,
+                               const AES_KEY *key, uint8_t ivec[16], int enc,
+                               block128_f block) {
+  int n, rem, num;
+  uint8_t ovec[16 * 2 + 1]; /* +1 because we dererefence (but don't use) one
+                               byte off the end */
+
+  if (nbits <= 0 || nbits > 128) {
+    return;
+  }
+
+  // fill in the first half of the new IV with the current IV
+  OPENSSL_memcpy(ovec, ivec, 16);
+  // construct the new IV
+  (*block)(ivec, ivec, key);
+  num = (nbits + 7) / 8;
+  if (enc) {
+    // encrypt the input
+    for (n = 0; n < num; ++n) {
+      out[n] = (ovec[16 + n] = in[n] ^ ivec[n]);
+    }
+  } else {
+    // decrypt the input
+    for (n = 0; n < num; ++n) {
+      out[n] = (ovec[16 + n] = in[n]) ^ ivec[n];
+    }
+  }
+  // shift ovec left...
+  rem = nbits % 8;
+  num = nbits / 8;
+  if (rem == 0) {
+    OPENSSL_memcpy(ivec, ovec + num, 16);
+  } else {
+    for (n = 0; n < 16; ++n) {
+      ivec[n] = ovec[n + num] << rem | ovec[n + num + 1] >> (8 - rem);
+    }
+  }
+
+  // it is not necessary to cleanse ovec, since the IV is not secret
+}
+
+// N.B. This expects the input to be packed, MS bit first
+void CRYPTO_cfb128_1_encrypt(const uint8_t *in, uint8_t *out, size_t bits,
+                             const AES_KEY *key, uint8_t ivec[16],
+                             unsigned *num, int enc, block128_f block) {
+  size_t n;
+  uint8_t c[1], d[1];
+
+  assert(in && out && key && ivec && num);
+  assert(*num == 0);
+
+  for (n = 0; n < bits; ++n) {
+    c[0] = (in[n / 8] & (1 << (7 - n % 8))) ? 0x80 : 0;
+    cfbr_encrypt_block(c, d, 1, key, ivec, enc, block);
+    out[n / 8] = (out[n / 8] & ~(1 << (unsigned int)(7 - n % 8))) |
+                 ((d[0] & 0x80) >> (unsigned int)(n % 8));
+  }
+}
+
+void CRYPTO_cfb128_8_encrypt(const unsigned char *in, unsigned char *out,
+                             size_t length, const AES_KEY *key,
+                             unsigned char ivec[16], unsigned *num, int enc,
+                             block128_f block) {
+  size_t n;
+
+  assert(in && out && key && ivec && num);
+  assert(*num == 0);
+
+  for (n = 0; n < length; ++n) {
+    cfbr_encrypt_block(&in[n], &out[n], 8, key, ivec, enc, block);
+  }
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/modes/ctr.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/modes/ctr.c
@ -0,0 +1,200 @@
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ==================================================================== */
+
+#include <CBigNumBoringSSL_type_check.h>
+
+#include <assert.h>
+#include <string.h>
+
+#include "internal.h"
+
+
+// NOTE: the IV/counter CTR mode is big-endian.  The code itself
+// is endian-neutral.
+
+// increment counter (128-bit int) by 1
+static void ctr128_inc(uint8_t *counter) {
+  uint32_t n = 16, c = 1;
+
+  do {
+    --n;
+    c += counter[n];
+    counter[n] = (uint8_t) c;
+    c >>= 8;
+  } while (n);
+}
+
+OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
+                      "block cannot be divided into size_t");
+
+// The input encrypted as though 128bit counter mode is being used.  The extra
+// state information to record how much of the 128bit block we have used is
+// contained in *num, and the encrypted counter is kept in ecount_buf.  Both
+// *num and ecount_buf must be initialised with zeros before the first call to
+// CRYPTO_ctr128_encrypt().
+//
+// This algorithm assumes that the counter is in the x lower bits of the IV
+// (ivec), and that the application has full control over overflow and the rest
+// of the IV.  This implementation takes NO responsibility for checking that
+// the counter doesn't overflow into the rest of the IV when incremented.
+void CRYPTO_ctr128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
+                           const AES_KEY *key, uint8_t ivec[16],
+                           uint8_t ecount_buf[16], unsigned int *num,
+                           block128_f block) {
+  unsigned int n;
+
+  assert(key && ecount_buf && num);
+  assert(len == 0 || (in && out));
+  assert(*num < 16);
+
+  n = *num;
+
+  while (n && len) {
+    *(out++) = *(in++) ^ ecount_buf[n];
+    --len;
+    n = (n + 1) % 16;
+  }
+  while (len >= 16) {
+    (*block)(ivec, ecount_buf, key);
+    ctr128_inc(ivec);
+    for (n = 0; n < 16; n += sizeof(size_t)) {
+      store_word_le(out + n,
+                    load_word_le(in + n) ^ load_word_le(ecount_buf + n));
+    }
+    len -= 16;
+    out += 16;
+    in += 16;
+    n = 0;
+  }
+  if (len) {
+    (*block)(ivec, ecount_buf, key);
+    ctr128_inc(ivec);
+    while (len--) {
+      out[n] = in[n] ^ ecount_buf[n];
+      ++n;
+    }
+  }
+  *num = n;
+}
+
+// increment upper 96 bits of 128-bit counter by 1
+static void ctr96_inc(uint8_t *counter) {
+  uint32_t n = 12, c = 1;
+
+  do {
+    --n;
+    c += counter[n];
+    counter[n] = (uint8_t) c;
+    c >>= 8;
+  } while (n);
+}
+
+void CRYPTO_ctr128_encrypt_ctr32(const uint8_t *in, uint8_t *out, size_t len,
+                                 const AES_KEY *key, uint8_t ivec[16],
+                                 uint8_t ecount_buf[16], unsigned int *num,
+                                 ctr128_f func) {
+  unsigned int n, ctr32;
+
+  assert(key && ecount_buf && num);
+  assert(len == 0 || (in && out));
+  assert(*num < 16);
+
+  n = *num;
+
+  while (n && len) {
+    *(out++) = *(in++) ^ ecount_buf[n];
+    --len;
+    n = (n + 1) % 16;
+  }
+
+  ctr32 = GETU32(ivec + 12);
+  while (len >= 16) {
+    size_t blocks = len / 16;
+    // 1<<28 is just a not-so-small yet not-so-large number...
+    // Below condition is practically never met, but it has to
+    // be checked for code correctness.
+    if (sizeof(size_t) > sizeof(unsigned int) && blocks > (1U << 28)) {
+      blocks = (1U << 28);
+    }
+    // As (*func) operates on 32-bit counter, caller
+    // has to handle overflow. 'if' below detects the
+    // overflow, which is then handled by limiting the
+    // amount of blocks to the exact overflow point...
+    ctr32 += (uint32_t)blocks;
+    if (ctr32 < blocks) {
+      blocks -= ctr32;
+      ctr32 = 0;
+    }
+    (*func)(in, out, blocks, key, ivec);
+    // (*func) does not update ivec, caller does:
+    PUTU32(ivec + 12, ctr32);
+    // ... overflow was detected, propogate carry.
+    if (ctr32 == 0) {
+      ctr96_inc(ivec);
+    }
+    blocks *= 16;
+    len -= blocks;
+    out += blocks;
+    in += blocks;
+  }
+  if (len) {
+    OPENSSL_memset(ecount_buf, 0, 16);
+    (*func)(ecount_buf, ecount_buf, 1, key, ivec);
+    ++ctr32;
+    PUTU32(ivec + 12, ctr32);
+    if (ctr32 == 0) {
+      ctr96_inc(ivec);
+    }
+    while (len--) {
+      out[n] = in[n] ^ ecount_buf[n];
+      ++n;
+    }
+  }
+
+  *num = n;
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/modes/gcm.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/modes/gcm.c
@ -0,0 +1,729 @@
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ==================================================================== */
+
+#include <CBigNumBoringSSL_base.h>
+
+#include <assert.h>
+#include <string.h>
+
+#include <CBigNumBoringSSL_mem.h>
+#include <CBigNumBoringSSL_cpu.h>
+
+#include "internal.h"
+#include "../../internal.h"
+
+
+// kSizeTWithoutLower4Bits is a mask that can be used to zero the lower four
+// bits of a |size_t|.
+static const size_t kSizeTWithoutLower4Bits = (size_t) -16;
+
+
+#define GCM_MUL(ctx, Xi) gcm_gmult_nohw((ctx)->Xi.u, (ctx)->gcm_key.Htable)
+#define GHASH(ctx, in, len) \
+  gcm_ghash_nohw((ctx)->Xi.u, (ctx)->gcm_key.Htable, in, len)
+// GHASH_CHUNK is "stride parameter" missioned to mitigate cache
+// trashing effect. In other words idea is to hash data while it's
+// still in L1 cache after encryption pass...
+#define GHASH_CHUNK (3 * 1024)
+
+#if defined(GHASH_ASM_X86_64) || defined(GHASH_ASM_X86)
+static inline void gcm_reduce_1bit(u128 *V) {
+  if (sizeof(size_t) == 8) {
+    uint64_t T = UINT64_C(0xe100000000000000) & (0 - (V->hi & 1));
+    V->hi = (V->lo << 63) | (V->hi >> 1);
+    V->lo = (V->lo >> 1) ^ T;
+  } else {
+    uint32_t T = 0xe1000000U & (0 - (uint32_t)(V->hi & 1));
+    V->hi = (V->lo << 63) | (V->hi >> 1);
+    V->lo = (V->lo >> 1) ^ ((uint64_t)T << 32);
+  }
+}
+
+void gcm_init_ssse3(u128 Htable[16], const uint64_t H[2]) {
+  Htable[0].hi = 0;
+  Htable[0].lo = 0;
+  u128 V;
+  V.hi = H[1];
+  V.lo = H[0];
+
+  Htable[8] = V;
+  gcm_reduce_1bit(&V);
+  Htable[4] = V;
+  gcm_reduce_1bit(&V);
+  Htable[2] = V;
+  gcm_reduce_1bit(&V);
+  Htable[1] = V;
+  Htable[3].hi = V.hi ^ Htable[2].hi, Htable[3].lo = V.lo ^ Htable[2].lo;
+  V = Htable[4];
+  Htable[5].hi = V.hi ^ Htable[1].hi, Htable[5].lo = V.lo ^ Htable[1].lo;
+  Htable[6].hi = V.hi ^ Htable[2].hi, Htable[6].lo = V.lo ^ Htable[2].lo;
+  Htable[7].hi = V.hi ^ Htable[3].hi, Htable[7].lo = V.lo ^ Htable[3].lo;
+  V = Htable[8];
+  Htable[9].hi = V.hi ^ Htable[1].hi, Htable[9].lo = V.lo ^ Htable[1].lo;
+  Htable[10].hi = V.hi ^ Htable[2].hi, Htable[10].lo = V.lo ^ Htable[2].lo;
+  Htable[11].hi = V.hi ^ Htable[3].hi, Htable[11].lo = V.lo ^ Htable[3].lo;
+  Htable[12].hi = V.hi ^ Htable[4].hi, Htable[12].lo = V.lo ^ Htable[4].lo;
+  Htable[13].hi = V.hi ^ Htable[5].hi, Htable[13].lo = V.lo ^ Htable[5].lo;
+  Htable[14].hi = V.hi ^ Htable[6].hi, Htable[14].lo = V.lo ^ Htable[6].lo;
+  Htable[15].hi = V.hi ^ Htable[7].hi, Htable[15].lo = V.lo ^ Htable[7].lo;
+
+  // Treat |Htable| as a 16x16 byte table and transpose it. Thus, Htable[i]
+  // contains the i'th byte of j*H for all j.
+  uint8_t *Hbytes = (uint8_t *)Htable;
+  for (int i = 0; i < 16; i++) {
+    for (int j = 0; j < i; j++) {
+      uint8_t tmp = Hbytes[16*i + j];
+      Hbytes[16*i + j] = Hbytes[16*j + i];
+      Hbytes[16*j + i] = tmp;
+    }
+  }
+}
+#endif  // GHASH_ASM_X86_64 || GHASH_ASM_X86
+
+#ifdef GCM_FUNCREF
+#undef GCM_MUL
+#define GCM_MUL(ctx, Xi) (*gcm_gmult_p)((ctx)->Xi.u, (ctx)->gcm_key.Htable)
+#undef GHASH
+#define GHASH(ctx, in, len) \
+  (*gcm_ghash_p)((ctx)->Xi.u, (ctx)->gcm_key.Htable, in, len)
+#endif  // GCM_FUNCREF
+
+void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
+                       u128 *out_key, u128 out_table[16], int *out_is_avx,
+                       const uint8_t gcm_key[16]) {
+  *out_is_avx = 0;
+
+  union {
+    uint64_t u[2];
+    uint8_t c[16];
+  } H;
+
+  OPENSSL_memcpy(H.c, gcm_key, 16);
+
+  // H is stored in host byte order
+  H.u[0] = CRYPTO_bswap8(H.u[0]);
+  H.u[1] = CRYPTO_bswap8(H.u[1]);
+
+  OPENSSL_memcpy(out_key, H.c, 16);
+
+#if defined(GHASH_ASM_X86_64)
+  if (crypto_gcm_clmul_enabled()) {
+    if (((OPENSSL_ia32cap_get()[1] >> 22) & 0x41) == 0x41) {  // AVX+MOVBE
+      gcm_init_avx(out_table, H.u);
+      *out_mult = gcm_gmult_avx;
+      *out_hash = gcm_ghash_avx;
+      *out_is_avx = 1;
+      return;
+    }
+    gcm_init_clmul(out_table, H.u);
+    *out_mult = gcm_gmult_clmul;
+    *out_hash = gcm_ghash_clmul;
+    return;
+  }
+  if (gcm_ssse3_capable()) {
+    gcm_init_ssse3(out_table, H.u);
+    *out_mult = gcm_gmult_ssse3;
+    *out_hash = gcm_ghash_ssse3;
+    return;
+  }
+#elif defined(GHASH_ASM_X86)
+  if (crypto_gcm_clmul_enabled()) {
+    gcm_init_clmul(out_table, H.u);
+    *out_mult = gcm_gmult_clmul;
+    *out_hash = gcm_ghash_clmul;
+    return;
+  }
+  if (gcm_ssse3_capable()) {
+    gcm_init_ssse3(out_table, H.u);
+    *out_mult = gcm_gmult_ssse3;
+    *out_hash = gcm_ghash_ssse3;
+    return;
+  }
+#elif defined(GHASH_ASM_ARM)
+  if (gcm_pmull_capable()) {
+    gcm_init_v8(out_table, H.u);
+    *out_mult = gcm_gmult_v8;
+    *out_hash = gcm_ghash_v8;
+    return;
+  }
+
+  if (gcm_neon_capable()) {
+    gcm_init_neon(out_table, H.u);
+    *out_mult = gcm_gmult_neon;
+    *out_hash = gcm_ghash_neon;
+    return;
+  }
+#elif defined(GHASH_ASM_PPC64LE)
+  if (CRYPTO_is_PPC64LE_vcrypto_capable()) {
+    gcm_init_p8(out_table, H.u);
+    *out_mult = gcm_gmult_p8;
+    *out_hash = gcm_ghash_p8;
+    return;
+  }
+#endif
+
+  gcm_init_nohw(out_table, H.u);
+  *out_mult = gcm_gmult_nohw;
+  *out_hash = gcm_ghash_nohw;
+}
+
+void CRYPTO_gcm128_init_key(GCM128_KEY *gcm_key, const AES_KEY *aes_key,
+                            block128_f block, int block_is_hwaes) {
+  OPENSSL_memset(gcm_key, 0, sizeof(*gcm_key));
+  gcm_key->block = block;
+
+  uint8_t ghash_key[16];
+  OPENSSL_memset(ghash_key, 0, sizeof(ghash_key));
+  (*block)(ghash_key, ghash_key, aes_key);
+
+  int is_avx;
+  CRYPTO_ghash_init(&gcm_key->gmult, &gcm_key->ghash, &gcm_key->H,
+                    gcm_key->Htable, &is_avx, ghash_key);
+
+  gcm_key->use_aesni_gcm_crypt = (is_avx && block_is_hwaes) ? 1 : 0;
+}
+
+void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const AES_KEY *key,
+                         const uint8_t *iv, size_t len) {
+#ifdef GCM_FUNCREF
+  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
+      ctx->gcm_key.gmult;
+#endif
+
+  ctx->Yi.u[0] = 0;
+  ctx->Yi.u[1] = 0;
+  ctx->Xi.u[0] = 0;
+  ctx->Xi.u[1] = 0;
+  ctx->len.u[0] = 0;  // AAD length
+  ctx->len.u[1] = 0;  // message length
+  ctx->ares = 0;
+  ctx->mres = 0;
+
+  uint32_t ctr;
+  if (len == 12) {
+    OPENSSL_memcpy(ctx->Yi.c, iv, 12);
+    ctx->Yi.c[15] = 1;
+    ctr = 1;
+  } else {
+    uint64_t len0 = len;
+
+    while (len >= 16) {
+      for (size_t i = 0; i < 16; ++i) {
+        ctx->Yi.c[i] ^= iv[i];
+      }
+      GCM_MUL(ctx, Yi);
+      iv += 16;
+      len -= 16;
+    }
+    if (len) {
+      for (size_t i = 0; i < len; ++i) {
+        ctx->Yi.c[i] ^= iv[i];
+      }
+      GCM_MUL(ctx, Yi);
+    }
+    len0 <<= 3;
+    ctx->Yi.u[1] ^= CRYPTO_bswap8(len0);
+
+    GCM_MUL(ctx, Yi);
+    ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
+  }
+
+  (*ctx->gcm_key.block)(ctx->Yi.c, ctx->EK0.c, key);
+  ++ctr;
+  ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+}
+
+int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad, size_t len) {
+#ifdef GCM_FUNCREF
+  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
+      ctx->gcm_key.gmult;
+  void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+                      size_t len) = ctx->gcm_key.ghash;
+#endif
+
+  if (ctx->len.u[1]) {
+    return 0;
+  }
+
+  uint64_t alen = ctx->len.u[0] + len;
+  if (alen > (UINT64_C(1) << 61) || (sizeof(len) == 8 && alen < len)) {
+    return 0;
+  }
+  ctx->len.u[0] = alen;
+
+  unsigned n = ctx->ares;
+  if (n) {
+    while (n && len) {
+      ctx->Xi.c[n] ^= *(aad++);
+      --len;
+      n = (n + 1) % 16;
+    }
+    if (n == 0) {
+      GCM_MUL(ctx, Xi);
+    } else {
+      ctx->ares = n;
+      return 1;
+    }
+  }
+
+  // Process a whole number of blocks.
+  size_t len_blocks = len & kSizeTWithoutLower4Bits;
+  if (len_blocks != 0) {
+    GHASH(ctx, aad, len_blocks);
+    aad += len_blocks;
+    len -= len_blocks;
+  }
+
+  // Process the remainder.
+  if (len != 0) {
+    n = (unsigned int)len;
+    for (size_t i = 0; i < len; ++i) {
+      ctx->Xi.c[i] ^= aad[i];
+    }
+  }
+
+  ctx->ares = n;
+  return 1;
+}
+
+int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
+                          const uint8_t *in, uint8_t *out, size_t len) {
+  block128_f block = ctx->gcm_key.block;
+#ifdef GCM_FUNCREF
+  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
+      ctx->gcm_key.gmult;
+  void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+                      size_t len) = ctx->gcm_key.ghash;
+#endif
+
+  uint64_t mlen = ctx->len.u[1] + len;
+  if (mlen > ((UINT64_C(1) << 36) - 32) ||
+      (sizeof(len) == 8 && mlen < len)) {
+    return 0;
+  }
+  ctx->len.u[1] = mlen;
+
+  if (ctx->ares) {
+    // First call to encrypt finalizes GHASH(AAD)
+    GCM_MUL(ctx, Xi);
+    ctx->ares = 0;
+  }
+
+  unsigned n = ctx->mres;
+  if (n) {
+    while (n && len) {
+      ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
+      --len;
+      n = (n + 1) % 16;
+    }
+    if (n == 0) {
+      GCM_MUL(ctx, Xi);
+    } else {
+      ctx->mres = n;
+      return 1;
+    }
+  }
+
+  uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
+  while (len >= GHASH_CHUNK) {
+    size_t j = GHASH_CHUNK;
+
+    while (j) {
+      (*block)(ctx->Yi.c, ctx->EKi.c, key);
+      ++ctr;
+      ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+      for (size_t i = 0; i < 16; i += sizeof(size_t)) {
+        store_word_le(out + i,
+                      load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
+      }
+      out += 16;
+      in += 16;
+      j -= 16;
+    }
+    GHASH(ctx, out - GHASH_CHUNK, GHASH_CHUNK);
+    len -= GHASH_CHUNK;
+  }
+  size_t len_blocks = len & kSizeTWithoutLower4Bits;
+  if (len_blocks != 0) {
+    while (len >= 16) {
+      (*block)(ctx->Yi.c, ctx->EKi.c, key);
+      ++ctr;
+      ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+      for (size_t i = 0; i < 16; i += sizeof(size_t)) {
+        store_word_le(out + i,
+                      load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
+      }
+      out += 16;
+      in += 16;
+      len -= 16;
+    }
+    GHASH(ctx, out - len_blocks, len_blocks);
+  }
+  if (len) {
+    (*block)(ctx->Yi.c, ctx->EKi.c, key);
+    ++ctr;
+    ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+    while (len--) {
+      ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
+      ++n;
+    }
+  }
+
+  ctx->mres = n;
+  return 1;
+}
+
+int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx, const AES_KEY *key,
+                          const unsigned char *in, unsigned char *out,
+                          size_t len) {
+  block128_f block = ctx->gcm_key.block;
+#ifdef GCM_FUNCREF
+  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
+      ctx->gcm_key.gmult;
+  void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+                      size_t len) = ctx->gcm_key.ghash;
+#endif
+
+  uint64_t mlen = ctx->len.u[1] + len;
+  if (mlen > ((UINT64_C(1) << 36) - 32) ||
+      (sizeof(len) == 8 && mlen < len)) {
+    return 0;
+  }
+  ctx->len.u[1] = mlen;
+
+  if (ctx->ares) {
+    // First call to decrypt finalizes GHASH(AAD)
+    GCM_MUL(ctx, Xi);
+    ctx->ares = 0;
+  }
+
+  unsigned n = ctx->mres;
+  if (n) {
+    while (n && len) {
+      uint8_t c = *(in++);
+      *(out++) = c ^ ctx->EKi.c[n];
+      ctx->Xi.c[n] ^= c;
+      --len;
+      n = (n + 1) % 16;
+    }
+    if (n == 0) {
+      GCM_MUL(ctx, Xi);
+    } else {
+      ctx->mres = n;
+      return 1;
+    }
+  }
+
+  uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
+  while (len >= GHASH_CHUNK) {
+    size_t j = GHASH_CHUNK;
+
+    GHASH(ctx, in, GHASH_CHUNK);
+    while (j) {
+      (*block)(ctx->Yi.c, ctx->EKi.c, key);
+      ++ctr;
+      ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+      for (size_t i = 0; i < 16; i += sizeof(size_t)) {
+        store_word_le(out + i,
+                      load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
+      }
+      out += 16;
+      in += 16;
+      j -= 16;
+    }
+    len -= GHASH_CHUNK;
+  }
+  size_t len_blocks = len & kSizeTWithoutLower4Bits;
+  if (len_blocks != 0) {
+    GHASH(ctx, in, len_blocks);
+    while (len >= 16) {
+      (*block)(ctx->Yi.c, ctx->EKi.c, key);
+      ++ctr;
+      ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+      for (size_t i = 0; i < 16; i += sizeof(size_t)) {
+        store_word_le(out + i,
+                      load_word_le(in + i) ^ ctx->EKi.t[i / sizeof(size_t)]);
+      }
+      out += 16;
+      in += 16;
+      len -= 16;
+    }
+  }
+  if (len) {
+    (*block)(ctx->Yi.c, ctx->EKi.c, key);
+    ++ctr;
+    ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+    while (len--) {
+      uint8_t c = in[n];
+      ctx->Xi.c[n] ^= c;
+      out[n] = c ^ ctx->EKi.c[n];
+      ++n;
+    }
+  }
+
+  ctx->mres = n;
+  return 1;
+}
+
+int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx, const AES_KEY *key,
+                                const uint8_t *in, uint8_t *out, size_t len,
+                                ctr128_f stream) {
+#ifdef GCM_FUNCREF
+  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
+      ctx->gcm_key.gmult;
+  void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+                      size_t len) = ctx->gcm_key.ghash;
+#endif
+
+  uint64_t mlen = ctx->len.u[1] + len;
+  if (mlen > ((UINT64_C(1) << 36) - 32) ||
+      (sizeof(len) == 8 && mlen < len)) {
+    return 0;
+  }
+  ctx->len.u[1] = mlen;
+
+  if (ctx->ares) {
+    // First call to encrypt finalizes GHASH(AAD)
+    GCM_MUL(ctx, Xi);
+    ctx->ares = 0;
+  }
+
+  unsigned n = ctx->mres;
+  if (n) {
+    while (n && len) {
+      ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
+      --len;
+      n = (n + 1) % 16;
+    }
+    if (n == 0) {
+      GCM_MUL(ctx, Xi);
+    } else {
+      ctx->mres = n;
+      return 1;
+    }
+  }
+
+#if defined(AESNI_GCM)
+  // Check |len| to work around a C language bug. See https://crbug.com/1019588.
+  if (ctx->gcm_key.use_aesni_gcm_crypt && len > 0) {
+    // |aesni_gcm_encrypt| may not process all the input given to it. It may
+    // not process *any* of its input if it is deemed too small.
+    size_t bulk = aesni_gcm_encrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
+    in += bulk;
+    out += bulk;
+    len -= bulk;
+  }
+#endif
+
+  uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
+  while (len >= GHASH_CHUNK) {
+    (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
+    ctr += GHASH_CHUNK / 16;
+    ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+    GHASH(ctx, out, GHASH_CHUNK);
+    out += GHASH_CHUNK;
+    in += GHASH_CHUNK;
+    len -= GHASH_CHUNK;
+  }
+  size_t len_blocks = len & kSizeTWithoutLower4Bits;
+  if (len_blocks != 0) {
+    size_t j = len_blocks / 16;
+
+    (*stream)(in, out, j, key, ctx->Yi.c);
+    ctr += (unsigned int)j;
+    ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+    in += len_blocks;
+    len -= len_blocks;
+    GHASH(ctx, out, len_blocks);
+    out += len_blocks;
+  }
+  if (len) {
+    (*ctx->gcm_key.block)(ctx->Yi.c, ctx->EKi.c, key);
+    ++ctr;
+    ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+    while (len--) {
+      ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
+      ++n;
+    }
+  }
+
+  ctx->mres = n;
+  return 1;
+}
+
+int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx, const AES_KEY *key,
+                                const uint8_t *in, uint8_t *out, size_t len,
+                                ctr128_f stream) {
+#ifdef GCM_FUNCREF
+  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
+      ctx->gcm_key.gmult;
+  void (*gcm_ghash_p)(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+                      size_t len) = ctx->gcm_key.ghash;
+#endif
+
+  uint64_t mlen = ctx->len.u[1] + len;
+  if (mlen > ((UINT64_C(1) << 36) - 32) ||
+      (sizeof(len) == 8 && mlen < len)) {
+    return 0;
+  }
+  ctx->len.u[1] = mlen;
+
+  if (ctx->ares) {
+    // First call to decrypt finalizes GHASH(AAD)
+    GCM_MUL(ctx, Xi);
+    ctx->ares = 0;
+  }
+
+  unsigned n = ctx->mres;
+  if (n) {
+    while (n && len) {
+      uint8_t c = *(in++);
+      *(out++) = c ^ ctx->EKi.c[n];
+      ctx->Xi.c[n] ^= c;
+      --len;
+      n = (n + 1) % 16;
+    }
+    if (n == 0) {
+      GCM_MUL(ctx, Xi);
+    } else {
+      ctx->mres = n;
+      return 1;
+    }
+  }
+
+#if defined(AESNI_GCM)
+  // Check |len| to work around a C language bug. See https://crbug.com/1019588.
+  if (ctx->gcm_key.use_aesni_gcm_crypt && len > 0) {
+    // |aesni_gcm_decrypt| may not process all the input given to it. It may
+    // not process *any* of its input if it is deemed too small.
+    size_t bulk = aesni_gcm_decrypt(in, out, len, key, ctx->Yi.c, ctx->Xi.u);
+    in += bulk;
+    out += bulk;
+    len -= bulk;
+  }
+#endif
+
+  uint32_t ctr = CRYPTO_bswap4(ctx->Yi.d[3]);
+  while (len >= GHASH_CHUNK) {
+    GHASH(ctx, in, GHASH_CHUNK);
+    (*stream)(in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
+    ctr += GHASH_CHUNK / 16;
+    ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+    out += GHASH_CHUNK;
+    in += GHASH_CHUNK;
+    len -= GHASH_CHUNK;
+  }
+  size_t len_blocks = len & kSizeTWithoutLower4Bits;
+  if (len_blocks != 0) {
+    size_t j = len_blocks / 16;
+
+    GHASH(ctx, in, len_blocks);
+    (*stream)(in, out, j, key, ctx->Yi.c);
+    ctr += (unsigned int)j;
+    ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+    out += len_blocks;
+    in += len_blocks;
+    len -= len_blocks;
+  }
+  if (len) {
+    (*ctx->gcm_key.block)(ctx->Yi.c, ctx->EKi.c, key);
+    ++ctr;
+    ctx->Yi.d[3] = CRYPTO_bswap4(ctr);
+    while (len--) {
+      uint8_t c = in[n];
+      ctx->Xi.c[n] ^= c;
+      out[n] = c ^ ctx->EKi.c[n];
+      ++n;
+    }
+  }
+
+  ctx->mres = n;
+  return 1;
+}
+
+int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag, size_t len) {
+#ifdef GCM_FUNCREF
+  void (*gcm_gmult_p)(uint64_t Xi[2], const u128 Htable[16]) =
+      ctx->gcm_key.gmult;
+#endif
+
+  if (ctx->mres || ctx->ares) {
+    GCM_MUL(ctx, Xi);
+  }
+
+  ctx->Xi.u[0] ^= CRYPTO_bswap8(ctx->len.u[0] << 3);
+  ctx->Xi.u[1] ^= CRYPTO_bswap8(ctx->len.u[1] << 3);
+  GCM_MUL(ctx, Xi);
+
+  ctx->Xi.u[0] ^= ctx->EK0.u[0];
+  ctx->Xi.u[1] ^= ctx->EK0.u[1];
+
+  if (tag && len <= sizeof(ctx->Xi)) {
+    return CRYPTO_memcmp(ctx->Xi.c, tag, len) == 0;
+  } else {
+    return 0;
+  }
+}
+
+void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, unsigned char *tag, size_t len) {
+  CRYPTO_gcm128_finish(ctx, NULL, 0);
+  OPENSSL_memcpy(tag, ctx->Xi.c,
+                 len <= sizeof(ctx->Xi.c) ? len : sizeof(ctx->Xi.c));
+}
+
+#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
+int crypto_gcm_clmul_enabled(void) {
+#if defined(GHASH_ASM_X86) || defined(GHASH_ASM_X86_64)
+  const uint32_t *ia32cap = OPENSSL_ia32cap_get();
+  return (ia32cap[0] & (1 << 24)) &&  // check FXSR bit
+         (ia32cap[1] & (1 << 1));     // check PCLMULQDQ bit
+#else
+  return 0;
+#endif
+}
+#endif
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/modes/gcm_nohw.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/modes/gcm_nohw.c
@ -0,0 +1,304 @@
+/* Copyright (c) 2019, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <CBigNumBoringSSL_base.h>
+
+#include "../../internal.h"
+#include "internal.h"
+
+#if !defined(BORINGSSL_HAS_UINT128) && defined(OPENSSL_SSE2)
+#include <emmintrin.h>
+#endif
+
+
+// This file contains a constant-time implementation of GHASH based on the notes
+// in https://bearssl.org/constanttime.html#ghash-for-gcm and the reduction
+// algorithm described in
+// https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf.
+//
+// Unlike the BearSSL notes, we use uint128_t in the 64-bit implementation. Our
+// primary compilers (clang, clang-cl, and gcc) all support it. MSVC will run
+// the 32-bit implementation, but we can use its intrinsics if necessary.
+
+#if defined(BORINGSSL_HAS_UINT128)
+
+static void gcm_mul64_nohw(uint64_t *out_lo, uint64_t *out_hi, uint64_t a,
+                           uint64_t b) {
+  // One term every four bits means the largest term is 64/4 = 16, which barely
+  // overflows into the next term. Using one term every five bits would cost 25
+  // multiplications instead of 16. It is faster to mask off the bottom four
+  // bits of |a|, giving a largest term of 60/4 = 15, and apply the bottom bits
+  // separately.
+  uint64_t a0 = a & UINT64_C(0x1111111111111110);
+  uint64_t a1 = a & UINT64_C(0x2222222222222220);
+  uint64_t a2 = a & UINT64_C(0x4444444444444440);
+  uint64_t a3 = a & UINT64_C(0x8888888888888880);
+
+  uint64_t b0 = b & UINT64_C(0x1111111111111111);
+  uint64_t b1 = b & UINT64_C(0x2222222222222222);
+  uint64_t b2 = b & UINT64_C(0x4444444444444444);
+  uint64_t b3 = b & UINT64_C(0x8888888888888888);
+
+  uint128_t c0 = (a0 * (uint128_t)b0) ^ (a1 * (uint128_t)b3) ^
+                 (a2 * (uint128_t)b2) ^ (a3 * (uint128_t)b1);
+  uint128_t c1 = (a0 * (uint128_t)b1) ^ (a1 * (uint128_t)b0) ^
+                 (a2 * (uint128_t)b3) ^ (a3 * (uint128_t)b2);
+  uint128_t c2 = (a0 * (uint128_t)b2) ^ (a1 * (uint128_t)b1) ^
+                 (a2 * (uint128_t)b0) ^ (a3 * (uint128_t)b3);
+  uint128_t c3 = (a0 * (uint128_t)b3) ^ (a1 * (uint128_t)b2) ^
+                 (a2 * (uint128_t)b1) ^ (a3 * (uint128_t)b0);
+
+  // Multiply the bottom four bits of |a| with |b|.
+  uint64_t a0_mask = UINT64_C(0) - (a & 1);
+  uint64_t a1_mask = UINT64_C(0) - ((a >> 1) & 1);
+  uint64_t a2_mask = UINT64_C(0) - ((a >> 2) & 1);
+  uint64_t a3_mask = UINT64_C(0) - ((a >> 3) & 1);
+  uint128_t extra = (a0_mask & b) ^ ((uint128_t)(a1_mask & b) << 1) ^
+                    ((uint128_t)(a2_mask & b) << 2) ^
+                    ((uint128_t)(a3_mask & b) << 3);
+
+  *out_lo = (((uint64_t)c0) & UINT64_C(0x1111111111111111)) ^
+            (((uint64_t)c1) & UINT64_C(0x2222222222222222)) ^
+            (((uint64_t)c2) & UINT64_C(0x4444444444444444)) ^
+            (((uint64_t)c3) & UINT64_C(0x8888888888888888)) ^ ((uint64_t)extra);
+  *out_hi = (((uint64_t)(c0 >> 64)) & UINT64_C(0x1111111111111111)) ^
+            (((uint64_t)(c1 >> 64)) & UINT64_C(0x2222222222222222)) ^
+            (((uint64_t)(c2 >> 64)) & UINT64_C(0x4444444444444444)) ^
+            (((uint64_t)(c3 >> 64)) & UINT64_C(0x8888888888888888)) ^
+            ((uint64_t)(extra >> 64));
+}
+
+#elif defined(OPENSSL_SSE2)
+
+static __m128i gcm_mul32_nohw(uint32_t a, uint32_t b) {
+  // One term every four bits means the largest term is 32/4 = 8, which does not
+  // overflow into the next term.
+  __m128i aa = _mm_setr_epi32(a, 0, a, 0);
+  __m128i bb = _mm_setr_epi32(b, 0, b, 0);
+
+  __m128i a0a0 =
+      _mm_and_si128(aa, _mm_setr_epi32(0x11111111, 0, 0x11111111, 0));
+  __m128i a2a2 =
+      _mm_and_si128(aa, _mm_setr_epi32(0x44444444, 0, 0x44444444, 0));
+  __m128i b0b1 =
+      _mm_and_si128(bb, _mm_setr_epi32(0x11111111, 0, 0x22222222, 0));
+  __m128i b2b3 =
+      _mm_and_si128(bb, _mm_setr_epi32(0x44444444, 0, 0x88888888, 0));
+
+  __m128i c0c1 =
+      _mm_xor_si128(_mm_mul_epu32(a0a0, b0b1), _mm_mul_epu32(a2a2, b2b3));
+  __m128i c2c3 =
+      _mm_xor_si128(_mm_mul_epu32(a2a2, b0b1), _mm_mul_epu32(a0a0, b2b3));
+
+  __m128i a1a1 =
+      _mm_and_si128(aa, _mm_setr_epi32(0x22222222, 0, 0x22222222, 0));
+  __m128i a3a3 =
+      _mm_and_si128(aa, _mm_setr_epi32(0x88888888, 0, 0x88888888, 0));
+  __m128i b3b0 =
+      _mm_and_si128(bb, _mm_setr_epi32(0x88888888, 0, 0x11111111, 0));
+  __m128i b1b2 =
+      _mm_and_si128(bb, _mm_setr_epi32(0x22222222, 0, 0x44444444, 0));
+
+  c0c1 = _mm_xor_si128(c0c1, _mm_mul_epu32(a1a1, b3b0));
+  c0c1 = _mm_xor_si128(c0c1, _mm_mul_epu32(a3a3, b1b2));
+  c2c3 = _mm_xor_si128(c2c3, _mm_mul_epu32(a3a3, b3b0));
+  c2c3 = _mm_xor_si128(c2c3, _mm_mul_epu32(a1a1, b1b2));
+
+  c0c1 = _mm_and_si128(
+      c0c1, _mm_setr_epi32(0x11111111, 0x11111111, 0x22222222, 0x22222222));
+  c2c3 = _mm_and_si128(
+      c2c3, _mm_setr_epi32(0x44444444, 0x44444444, 0x88888888, 0x88888888));
+
+  c0c1 = _mm_xor_si128(c0c1, c2c3);
+  // c0 ^= c1
+  c0c1 = _mm_xor_si128(c0c1, _mm_srli_si128(c0c1, 8));
+  return c0c1;
+}
+
+static void gcm_mul64_nohw(uint64_t *out_lo, uint64_t *out_hi, uint64_t a,
+                           uint64_t b) {
+  uint32_t a0 = a & 0xffffffff;
+  uint32_t a1 = a >> 32;
+  uint32_t b0 = b & 0xffffffff;
+  uint32_t b1 = b >> 32;
+  // Karatsuba multiplication.
+  __m128i lo = gcm_mul32_nohw(a0, b0);
+  __m128i hi = gcm_mul32_nohw(a1, b1);
+  __m128i mid = gcm_mul32_nohw(a0 ^ a1, b0 ^ b1);
+  mid = _mm_xor_si128(mid, lo);
+  mid = _mm_xor_si128(mid, hi);
+  __m128i ret = _mm_unpacklo_epi64(lo, hi);
+  mid = _mm_slli_si128(mid, 4);
+  mid = _mm_and_si128(mid, _mm_setr_epi32(0, 0xffffffff, 0xffffffff, 0));
+  ret = _mm_xor_si128(ret, mid);
+  memcpy(out_lo, &ret, 8);
+  memcpy(out_hi, ((char*)&ret) + 8, 8);
+}
+
+#else  // !BORINGSSL_HAS_UINT128 && !OPENSSL_SSE2
+
+static uint64_t gcm_mul32_nohw(uint32_t a, uint32_t b) {
+  // One term every four bits means the largest term is 32/4 = 8, which does not
+  // overflow into the next term.
+  uint32_t a0 = a & 0x11111111;
+  uint32_t a1 = a & 0x22222222;
+  uint32_t a2 = a & 0x44444444;
+  uint32_t a3 = a & 0x88888888;
+
+  uint32_t b0 = b & 0x11111111;
+  uint32_t b1 = b & 0x22222222;
+  uint32_t b2 = b & 0x44444444;
+  uint32_t b3 = b & 0x88888888;
+
+  uint64_t c0 = (a0 * (uint64_t)b0) ^ (a1 * (uint64_t)b3) ^
+                (a2 * (uint64_t)b2) ^ (a3 * (uint64_t)b1);
+  uint64_t c1 = (a0 * (uint64_t)b1) ^ (a1 * (uint64_t)b0) ^
+                (a2 * (uint64_t)b3) ^ (a3 * (uint64_t)b2);
+  uint64_t c2 = (a0 * (uint64_t)b2) ^ (a1 * (uint64_t)b1) ^
+                (a2 * (uint64_t)b0) ^ (a3 * (uint64_t)b3);
+  uint64_t c3 = (a0 * (uint64_t)b3) ^ (a1 * (uint64_t)b2) ^
+                (a2 * (uint64_t)b1) ^ (a3 * (uint64_t)b0);
+
+  return (c0 & UINT64_C(0x1111111111111111)) |
+         (c1 & UINT64_C(0x2222222222222222)) |
+         (c2 & UINT64_C(0x4444444444444444)) |
+         (c3 & UINT64_C(0x8888888888888888));
+}
+
+static void gcm_mul64_nohw(uint64_t *out_lo, uint64_t *out_hi, uint64_t a,
+                           uint64_t b) {
+  uint32_t a0 = a & 0xffffffff;
+  uint32_t a1 = a >> 32;
+  uint32_t b0 = b & 0xffffffff;
+  uint32_t b1 = b >> 32;
+  // Karatsuba multiplication.
+  uint64_t lo = gcm_mul32_nohw(a0, b0);
+  uint64_t hi = gcm_mul32_nohw(a1, b1);
+  uint64_t mid = gcm_mul32_nohw(a0 ^ a1, b0 ^ b1) ^ lo ^ hi;
+  *out_lo = lo ^ (mid << 32);
+  *out_hi = hi ^ (mid >> 32);
+}
+
+#endif  // BORINGSSL_HAS_UINT128
+
+void gcm_init_nohw(u128 Htable[16], const uint64_t Xi[2]) {
+  // We implement GHASH in terms of POLYVAL, as described in RFC8452. This
+  // avoids a shift by 1 in the multiplication, needed to account for bit
+  // reversal losing a bit after multiplication, that is,
+  // rev128(X) * rev128(Y) = rev255(X*Y).
+  //
+  // Per Appendix A, we run mulX_POLYVAL. Note this is the same transformation
+  // applied by |gcm_init_clmul|, etc. Note |Xi| has already been byteswapped.
+  //
+  // See also slide 16 of
+  // https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf
+  Htable[0].lo = Xi[1];
+  Htable[0].hi = Xi[0];
+
+  uint64_t carry = Htable[0].hi >> 63;
+  carry = 0u - carry;
+
+  Htable[0].hi <<= 1;
+  Htable[0].hi |= Htable[0].lo >> 63;
+  Htable[0].lo <<= 1;
+
+  // The irreducible polynomial is 1 + x^121 + x^126 + x^127 + x^128, so we
+  // conditionally add 0xc200...0001.
+  Htable[0].lo ^= carry & 1;
+  Htable[0].hi ^= carry & UINT64_C(0xc200000000000000);
+
+  // This implementation does not use the rest of |Htable|.
+}
+
+static void gcm_polyval_nohw(uint64_t Xi[2], const u128 *H) {
+  // Karatsuba multiplication. The product of |Xi| and |H| is stored in |r0|
+  // through |r3|. Note there is no byte or bit reversal because we are
+  // evaluating POLYVAL.
+  uint64_t r0, r1;
+  gcm_mul64_nohw(&r0, &r1, Xi[0], H->lo);
+  uint64_t r2, r3;
+  gcm_mul64_nohw(&r2, &r3, Xi[1], H->hi);
+  uint64_t mid0, mid1;
+  gcm_mul64_nohw(&mid0, &mid1, Xi[0] ^ Xi[1], H->hi ^ H->lo);
+  mid0 ^= r0 ^ r2;
+  mid1 ^= r1 ^ r3;
+  r2 ^= mid1;
+  r1 ^= mid0;
+
+  // Now we multiply our 256-bit result by x^-128 and reduce. |r2| and
+  // |r3| shifts into position and we must multiply |r0| and |r1| by x^-128. We
+  // have:
+  //
+  //       1 = x^121 + x^126 + x^127 + x^128
+  //  x^-128 = x^-7 + x^-2 + x^-1 + 1
+  //
+  // This is the GHASH reduction step, but with bits flowing in reverse.
+
+  // The x^-7, x^-2, and x^-1 terms shift bits past x^0, which would require
+  // another reduction steps. Instead, we gather the excess bits, incorporate
+  // them into |r0| and |r1| and reduce once. See slides 17-19
+  // of https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf.
+  r1 ^= (r0 << 63) ^ (r0 << 62) ^ (r0 << 57);
+
+  // 1
+  r2 ^= r0;
+  r3 ^= r1;
+
+  // x^-1
+  r2 ^= r0 >> 1;
+  r2 ^= r1 << 63;
+  r3 ^= r1 >> 1;
+
+  // x^-2
+  r2 ^= r0 >> 2;
+  r2 ^= r1 << 62;
+  r3 ^= r1 >> 2;
+
+  // x^-7
+  r2 ^= r0 >> 7;
+  r2 ^= r1 << 57;
+  r3 ^= r1 >> 7;
+
+  Xi[0] = r2;
+  Xi[1] = r3;
+}
+
+void gcm_gmult_nohw(uint64_t Xi[2], const u128 Htable[16]) {
+  uint64_t swapped[2];
+  swapped[0] = CRYPTO_bswap8(Xi[1]);
+  swapped[1] = CRYPTO_bswap8(Xi[0]);
+  gcm_polyval_nohw(swapped, &Htable[0]);
+  Xi[0] = CRYPTO_bswap8(swapped[1]);
+  Xi[1] = CRYPTO_bswap8(swapped[0]);
+}
+
+void gcm_ghash_nohw(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+                    size_t len) {
+  uint64_t swapped[2];
+  swapped[0] = CRYPTO_bswap8(Xi[1]);
+  swapped[1] = CRYPTO_bswap8(Xi[0]);
+
+  while (len >= 16) {
+    uint64_t block[2];
+    OPENSSL_memcpy(block, inp, 16);
+    swapped[0] ^= CRYPTO_bswap8(block[1]);
+    swapped[1] ^= CRYPTO_bswap8(block[0]);
+    gcm_polyval_nohw(swapped, &Htable[0]);
+    inp += 16;
+    len -= 16;
+  }
+
+  Xi[0] = CRYPTO_bswap8(swapped[1]);
+  Xi[1] = CRYPTO_bswap8(swapped[0]);
+}
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/modes/internal.h
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/modes/internal.h
@ -0,0 +1,441 @@
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ==================================================================== */
+
+#ifndef OPENSSL_HEADER_MODES_INTERNAL_H
+#define OPENSSL_HEADER_MODES_INTERNAL_H
+
+#include <CBigNumBoringSSL_base.h>
+
+#include <CBigNumBoringSSL_aes.h>
+#include <CBigNumBoringSSL_cpu.h>
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../internal.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+static inline uint32_t GETU32(const void *in) {
+  uint32_t v;
+  OPENSSL_memcpy(&v, in, sizeof(v));
+  return CRYPTO_bswap4(v);
+}
+
+static inline void PUTU32(void *out, uint32_t v) {
+  v = CRYPTO_bswap4(v);
+  OPENSSL_memcpy(out, &v, sizeof(v));
+}
+
+static inline size_t load_word_le(const void *in) {
+  size_t v;
+  OPENSSL_memcpy(&v, in, sizeof(v));
+  return v;
+}
+
+static inline void store_word_le(void *out, size_t v) {
+  OPENSSL_memcpy(out, &v, sizeof(v));
+}
+
+// block128_f is the type of an AES block cipher implementation.
+//
+// Unlike upstream OpenSSL, it and the other functions in this file hard-code
+// |AES_KEY|. It is undefined in C to call a function pointer with anything
+// other than the original type. Thus we either must match |block128_f| to the
+// type signature of |AES_encrypt| and friends or pass in |void*| wrapper
+// functions.
+//
+// These functions are called exclusively with AES, so we use the former.
+typedef void (*block128_f)(const uint8_t in[16], uint8_t out[16],
+                           const AES_KEY *key);
+
+
+// CTR.
+
+// ctr128_f is the type of a function that performs CTR-mode encryption.
+typedef void (*ctr128_f)(const uint8_t *in, uint8_t *out, size_t blocks,
+                         const AES_KEY *key, const uint8_t ivec[16]);
+
+// CRYPTO_ctr128_encrypt encrypts (or decrypts, it's the same in CTR mode)
+// |len| bytes from |in| to |out| using |block| in counter mode. There's no
+// requirement that |len| be a multiple of any value and any partial blocks are
+// stored in |ecount_buf| and |*num|, which must be zeroed before the initial
+// call. The counter is a 128-bit, big-endian value in |ivec| and is
+// incremented by this function.
+void CRYPTO_ctr128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
+                           const AES_KEY *key, uint8_t ivec[16],
+                           uint8_t ecount_buf[16], unsigned *num,
+                           block128_f block);
+
+// CRYPTO_ctr128_encrypt_ctr32 acts like |CRYPTO_ctr128_encrypt| but takes
+// |ctr|, a function that performs CTR mode but only deals with the lower 32
+// bits of the counter. This is useful when |ctr| can be an optimised
+// function.
+void CRYPTO_ctr128_encrypt_ctr32(const uint8_t *in, uint8_t *out, size_t len,
+                                 const AES_KEY *key, uint8_t ivec[16],
+                                 uint8_t ecount_buf[16], unsigned *num,
+                                 ctr128_f ctr);
+
+
+// GCM.
+//
+// This API differs from the upstream API slightly. The |GCM128_CONTEXT| does
+// not have a |key| pointer that points to the key as upstream's version does.
+// Instead, every function takes a |key| parameter. This way |GCM128_CONTEXT|
+// can be safely copied. Additionally, |gcm_key| is split into a separate
+// struct.
+
+typedef struct { uint64_t hi,lo; } u128;
+
+// gmult_func multiplies |Xi| by the GCM key and writes the result back to
+// |Xi|.
+typedef void (*gmult_func)(uint64_t Xi[2], const u128 Htable[16]);
+
+// ghash_func repeatedly multiplies |Xi| by the GCM key and adds in blocks from
+// |inp|. The result is written back to |Xi| and the |len| argument must be a
+// multiple of 16.
+typedef void (*ghash_func)(uint64_t Xi[2], const u128 Htable[16],
+                           const uint8_t *inp, size_t len);
+
+typedef struct gcm128_key_st {
+  // Note the MOVBE-based, x86-64, GHASH assembly requires |H| and |Htable| to
+  // be the first two elements of this struct. Additionally, some assembly
+  // routines require a 16-byte-aligned |Htable| when hashing data, but not
+  // initialization. |GCM128_KEY| is not itself aligned to simplify embedding in
+  // |EVP_AEAD_CTX|, but |Htable|'s offset must be a multiple of 16.
+  u128 H;
+  u128 Htable[16];
+  gmult_func gmult;
+  ghash_func ghash;
+
+  block128_f block;
+
+  // use_aesni_gcm_crypt is true if this context should use the assembly
+  // functions |aesni_gcm_encrypt| and |aesni_gcm_decrypt| to process data.
+  unsigned use_aesni_gcm_crypt:1;
+} GCM128_KEY;
+
+// GCM128_CONTEXT contains state for a single GCM operation. The structure
+// should be zero-initialized before use.
+typedef struct {
+  // The following 5 names follow names in GCM specification
+  union {
+    uint64_t u[2];
+    uint32_t d[4];
+    uint8_t c[16];
+    size_t t[16 / sizeof(size_t)];
+  } Yi, EKi, EK0, len, Xi;
+
+  // Note that the order of |Xi| and |gcm_key| is fixed by the MOVBE-based,
+  // x86-64, GHASH assembly. Additionally, some assembly routines require
+  // |gcm_key| to be 16-byte aligned. |GCM128_KEY| is not itself aligned to
+  // simplify embedding in |EVP_AEAD_CTX|.
+  alignas(16) GCM128_KEY gcm_key;
+
+  unsigned mres, ares;
+} GCM128_CONTEXT;
+
+#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
+// crypto_gcm_clmul_enabled returns one if the CLMUL implementation of GCM is
+// used.
+int crypto_gcm_clmul_enabled(void);
+#endif
+
+// CRYPTO_ghash_init writes a precomputed table of powers of |gcm_key| to
+// |out_table| and sets |*out_mult| and |*out_hash| to (potentially hardware
+// accelerated) functions for performing operations in the GHASH field. If the
+// AVX implementation was used |*out_is_avx| will be true.
+void CRYPTO_ghash_init(gmult_func *out_mult, ghash_func *out_hash,
+                       u128 *out_key, u128 out_table[16], int *out_is_avx,
+                       const uint8_t gcm_key[16]);
+
+// CRYPTO_gcm128_init_key initialises |gcm_key| to use |block| (typically AES)
+// with the given key. |block_is_hwaes| is one if |block| is |aes_hw_encrypt|.
+OPENSSL_EXPORT void CRYPTO_gcm128_init_key(GCM128_KEY *gcm_key,
+                                           const AES_KEY *key, block128_f block,
+                                           int block_is_hwaes);
+
+// CRYPTO_gcm128_setiv sets the IV (nonce) for |ctx|. The |key| must be the
+// same key that was passed to |CRYPTO_gcm128_init|.
+OPENSSL_EXPORT void CRYPTO_gcm128_setiv(GCM128_CONTEXT *ctx, const AES_KEY *key,
+                                        const uint8_t *iv, size_t iv_len);
+
+// CRYPTO_gcm128_aad sets the authenticated data for an instance of GCM.
+// This must be called before and data is encrypted. It returns one on success
+// and zero otherwise.
+OPENSSL_EXPORT int CRYPTO_gcm128_aad(GCM128_CONTEXT *ctx, const uint8_t *aad,
+                                     size_t len);
+
+// CRYPTO_gcm128_encrypt encrypts |len| bytes from |in| to |out|. The |key|
+// must be the same key that was passed to |CRYPTO_gcm128_init|. It returns one
+// on success and zero otherwise.
+OPENSSL_EXPORT int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
+                                         const AES_KEY *key, const uint8_t *in,
+                                         uint8_t *out, size_t len);
+
+// CRYPTO_gcm128_decrypt decrypts |len| bytes from |in| to |out|. The |key|
+// must be the same key that was passed to |CRYPTO_gcm128_init|. It returns one
+// on success and zero otherwise.
+OPENSSL_EXPORT int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
+                                         const AES_KEY *key, const uint8_t *in,
+                                         uint8_t *out, size_t len);
+
+// CRYPTO_gcm128_encrypt_ctr32 encrypts |len| bytes from |in| to |out| using
+// a CTR function that only handles the bottom 32 bits of the nonce, like
+// |CRYPTO_ctr128_encrypt_ctr32|. The |key| must be the same key that was
+// passed to |CRYPTO_gcm128_init|. It returns one on success and zero
+// otherwise.
+OPENSSL_EXPORT int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
+                                               const AES_KEY *key,
+                                               const uint8_t *in, uint8_t *out,
+                                               size_t len, ctr128_f stream);
+
+// CRYPTO_gcm128_decrypt_ctr32 decrypts |len| bytes from |in| to |out| using
+// a CTR function that only handles the bottom 32 bits of the nonce, like
+// |CRYPTO_ctr128_encrypt_ctr32|. The |key| must be the same key that was
+// passed to |CRYPTO_gcm128_init|. It returns one on success and zero
+// otherwise.
+OPENSSL_EXPORT int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
+                                               const AES_KEY *key,
+                                               const uint8_t *in, uint8_t *out,
+                                               size_t len, ctr128_f stream);
+
+// CRYPTO_gcm128_finish calculates the authenticator and compares it against
+// |len| bytes of |tag|. It returns one on success and zero otherwise.
+OPENSSL_EXPORT int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const uint8_t *tag,
+                                        size_t len);
+
+// CRYPTO_gcm128_tag calculates the authenticator and copies it into |tag|.
+// The minimum of |len| and 16 bytes are copied into |tag|.
+OPENSSL_EXPORT void CRYPTO_gcm128_tag(GCM128_CONTEXT *ctx, uint8_t *tag,
+                                      size_t len);
+
+
+// GCM assembly.
+
+void gcm_init_nohw(u128 Htable[16], const uint64_t H[2]);
+void gcm_gmult_nohw(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_nohw(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+                    size_t len);
+
+#if !defined(OPENSSL_NO_ASM)
+
+#if defined(OPENSSL_X86) || defined(OPENSSL_X86_64)
+#define GCM_FUNCREF
+void gcm_init_clmul(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_clmul(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_clmul(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+                     size_t len);
+
+OPENSSL_INLINE char gcm_ssse3_capable(void) {
+  return (OPENSSL_ia32cap_get()[1] & (1 << (41 - 32))) != 0;
+}
+
+// |gcm_gmult_ssse3| and |gcm_ghash_ssse3| require |Htable| to be
+// 16-byte-aligned, but |gcm_init_ssse3| does not.
+void gcm_init_ssse3(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_ssse3(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_ssse3(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in,
+                     size_t len);
+
+#if defined(OPENSSL_X86_64)
+#define GHASH_ASM_X86_64
+void gcm_init_avx(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_avx(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_avx(uint64_t Xi[2], const u128 Htable[16], const uint8_t *in,
+                   size_t len);
+
+#define AESNI_GCM
+size_t aesni_gcm_encrypt(const uint8_t *in, uint8_t *out, size_t len,
+                         const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi);
+size_t aesni_gcm_decrypt(const uint8_t *in, uint8_t *out, size_t len,
+                         const AES_KEY *key, uint8_t ivec[16], uint64_t *Xi);
+#endif  // OPENSSL_X86_64
+
+#if defined(OPENSSL_X86)
+#define GHASH_ASM_X86
+#endif  // OPENSSL_X86
+
+#elif defined(OPENSSL_ARM) || defined(OPENSSL_AARCH64)
+#define GHASH_ASM_ARM
+#define GCM_FUNCREF
+
+OPENSSL_INLINE int gcm_pmull_capable(void) {
+  return CRYPTO_is_ARMv8_PMULL_capable();
+}
+
+void gcm_init_v8(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_v8(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_v8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+                  size_t len);
+
+OPENSSL_INLINE int gcm_neon_capable(void) { return CRYPTO_is_NEON_capable(); }
+
+void gcm_init_neon(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_neon(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_neon(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+                    size_t len);
+
+#elif defined(OPENSSL_PPC64LE)
+#define GHASH_ASM_PPC64LE
+#define GCM_FUNCREF
+void gcm_init_p8(u128 Htable[16], const uint64_t Xi[2]);
+void gcm_gmult_p8(uint64_t Xi[2], const u128 Htable[16]);
+void gcm_ghash_p8(uint64_t Xi[2], const u128 Htable[16], const uint8_t *inp,
+                  size_t len);
+#endif
+#endif  // OPENSSL_NO_ASM
+
+
+// CBC.
+
+// cbc128_f is the type of a function that performs CBC-mode encryption.
+typedef void (*cbc128_f)(const uint8_t *in, uint8_t *out, size_t len,
+                         const AES_KEY *key, uint8_t ivec[16], int enc);
+
+// CRYPTO_cbc128_encrypt encrypts |len| bytes from |in| to |out| using the
+// given IV and block cipher in CBC mode. The input need not be a multiple of
+// 128 bits long, but the output will round up to the nearest 128 bit multiple,
+// zero padding the input if needed. The IV will be updated on return.
+void CRYPTO_cbc128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
+                           const AES_KEY *key, uint8_t ivec[16],
+                           block128_f block);
+
+// CRYPTO_cbc128_decrypt decrypts |len| bytes from |in| to |out| using the
+// given IV and block cipher in CBC mode. If |len| is not a multiple of 128
+// bits then only that many bytes will be written, but a multiple of 128 bits
+// is always read from |in|. The IV will be updated on return.
+void CRYPTO_cbc128_decrypt(const uint8_t *in, uint8_t *out, size_t len,
+                           const AES_KEY *key, uint8_t ivec[16],
+                           block128_f block);
+
+
+// OFB.
+
+// CRYPTO_ofb128_encrypt encrypts (or decrypts, it's the same with OFB mode)
+// |len| bytes from |in| to |out| using |block| in OFB mode. There's no
+// requirement that |len| be a multiple of any value and any partial blocks are
+// stored in |ivec| and |*num|, the latter must be zero before the initial
+// call.
+void CRYPTO_ofb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
+                           const AES_KEY *key, uint8_t ivec[16], unsigned *num,
+                           block128_f block);
+
+
+// CFB.
+
+// CRYPTO_cfb128_encrypt encrypts (or decrypts, if |enc| is zero) |len| bytes
+// from |in| to |out| using |block| in CFB mode. There's no requirement that
+// |len| be a multiple of any value and any partial blocks are stored in |ivec|
+// and |*num|, the latter must be zero before the initial call.
+void CRYPTO_cfb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
+                           const AES_KEY *key, uint8_t ivec[16], unsigned *num,
+                           int enc, block128_f block);
+
+// CRYPTO_cfb128_8_encrypt encrypts (or decrypts, if |enc| is zero) |len| bytes
+// from |in| to |out| using |block| in CFB-8 mode. Prior to the first call
+// |num| should be set to zero.
+void CRYPTO_cfb128_8_encrypt(const uint8_t *in, uint8_t *out, size_t len,
+                             const AES_KEY *key, uint8_t ivec[16],
+                             unsigned *num, int enc, block128_f block);
+
+// CRYPTO_cfb128_1_encrypt encrypts (or decrypts, if |enc| is zero) |len| bytes
+// from |in| to |out| using |block| in CFB-1 mode. Prior to the first call
+// |num| should be set to zero.
+void CRYPTO_cfb128_1_encrypt(const uint8_t *in, uint8_t *out, size_t bits,
+                             const AES_KEY *key, uint8_t ivec[16],
+                             unsigned *num, int enc, block128_f block);
+
+size_t CRYPTO_cts128_encrypt_block(const uint8_t *in, uint8_t *out, size_t len,
+                                   const AES_KEY *key, uint8_t ivec[16],
+                                   block128_f block);
+
+
+// POLYVAL.
+//
+// POLYVAL is a polynomial authenticator that operates over a field very
+// similar to the one that GHASH uses. See
+// https://tools.ietf.org/html/draft-irtf-cfrg-gcmsiv-02#section-3.
+
+typedef union {
+  uint64_t u[2];
+  uint8_t c[16];
+} polyval_block;
+
+struct polyval_ctx {
+  // Note that the order of |S|, |H| and |Htable| is fixed by the MOVBE-based,
+  // x86-64, GHASH assembly. Additionally, some assembly routines require
+  // |Htable| to be 16-byte aligned.
+  polyval_block S;
+  u128 H;
+  alignas(16) u128 Htable[16];
+  gmult_func gmult;
+  ghash_func ghash;
+};
+
+// CRYPTO_POLYVAL_init initialises |ctx| using |key|.
+void CRYPTO_POLYVAL_init(struct polyval_ctx *ctx, const uint8_t key[16]);
+
+// CRYPTO_POLYVAL_update_blocks updates the accumulator in |ctx| given the
+// blocks from |in|. Only a whole number of blocks can be processed so |in_len|
+// must be a multiple of 16.
+void CRYPTO_POLYVAL_update_blocks(struct polyval_ctx *ctx, const uint8_t *in,
+                                  size_t in_len);
+
+// CRYPTO_POLYVAL_finish writes the accumulator from |ctx| to |out|.
+void CRYPTO_POLYVAL_finish(const struct polyval_ctx *ctx, uint8_t out[16]);
+
+
+#if defined(__cplusplus)
+}  // extern C
+#endif
+
+#endif  // OPENSSL_HEADER_MODES_INTERNAL_H
--- a/Sources/CBigNumBoringSSL/crypto/fipsmodule/modes/ofb.c
+++ b/Sources/CBigNumBoringSSL/crypto/fipsmodule/modes/ofb.c
@ -0,0 +1,96 @@
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ==================================================================== */
+
+#include <CBigNumBoringSSL_type_check.h>
+
+#include <assert.h>
+#include <string.h>
+
+#include "internal.h"
+
+
+OPENSSL_STATIC_ASSERT(16 % sizeof(size_t) == 0,
+                      "block cannot be divided into size_t");
+
+void CRYPTO_ofb128_encrypt(const uint8_t *in, uint8_t *out, size_t len,
+                           const AES_KEY *key, uint8_t ivec[16], unsigned *num,
+                           block128_f block) {
+  assert(in && out && key && ivec && num);
+
+  unsigned n = *num;
+
+  while (n && len) {
+    *(out++) = *(in++) ^ ivec[n];
+    --len;
+    n = (n + 1) % 16;
+  }
+
+  while (len >= 16) {
+    (*block)(ivec, ivec, key);
+    for (; n < 16; n += sizeof(size_t)) {
+      size_t a, b;
+      OPENSSL_memcpy(&a, in + n, sizeof(size_t));
+      OPENSSL_memcpy(&b, ivec + n, sizeof(size_t));
+
+      const size_t c = a ^ b;
+      OPENSSL_memcpy(out + n, &c, sizeof(size_t));
+    }
+    len -= 16;
+    out += 16;
+    in += 16;
+    n = 0;
+  }
+  if (len) {
+    (*block)(ivec, ivec, key);
+    while (len--) {
+      out[n] = in[n] ^ ivec[n];
+      ++n;
+    }
+  }
+  *num = n;
+}
--- a/Show More
+++ b/Show More