forked from OSchip/llvm-project
[Support/BLAKE3] LLVM-specific changes over the original BLAKE3 C implementation
Changes from original BLAKE3 sources: * `blake.h`: * Changes to avoid conflicts if a client also links with its own BLAKE3 version: * Renamed the header macro guard with `LLVM_C_` prefix * Renamed the C symbols to add the `llvm_` prefix * Added a top header comment that references the CC0 license and points to the `LICENSE` file in the repo. * `blake3_impl.h`: Added `#define`s to remove some of `llvm_` prefixes for the rest of the internal implementation. * Implementation files: * Added a top header comment for `blake.c` * Used `llvm_` prefix for the C public API functions * Used `LLVM_LIBRARY_VISIBILITY` for internal implementation functions * Added `.private_extern`/`.hidden` in assembly files to reduce visibility of the internal implementation functions * `README.md`: * added a note about where the sources originated from * Used the C++ BLAKE3 class and `llvm_` prefixed C API in place of examples and API documentation. * Removed instructions about how to build the files.
This commit is contained in:
parent
9aa701984d
commit
7f05aa2d4c
|
@ -1,5 +1,21 @@
|
|||
#ifndef BLAKE3_H
|
||||
#define BLAKE3_H
|
||||
/*===-- llvm-c/blake3.h - BLAKE3 C Interface ----------------------*- C -*-===*\
|
||||
|* *|
|
||||
|* Released into the public domain with CC0 1.0 *|
|
||||
|* See 'llvm/lib/Support/BLAKE3/LICENSE' for info. *|
|
||||
|* SPDX-License-Identifier: CC0-1.0 *|
|
||||
|* *|
|
||||
|*===----------------------------------------------------------------------===*|
|
||||
|* *|
|
||||
|* This header declares the C interface to LLVM's BLAKE3 implementation. *|
|
||||
|* Original BLAKE3 C API: https://github.com/BLAKE3-team/BLAKE3/tree/1.3.1/c *|
|
||||
|* *|
|
||||
|* Symbols are prefixed with 'llvm' to avoid a potential conflict with *|
|
||||
|* another BLAKE3 version within the same program. *|
|
||||
|* *|
|
||||
\*===----------------------------------------------------------------------===*/
|
||||
|
||||
#ifndef LLVM_C_BLAKE3_H
|
||||
#define LLVM_C_BLAKE3_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
@ -8,53 +24,56 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define BLAKE3_VERSION_STRING "1.3.1"
|
||||
#define BLAKE3_KEY_LEN 32
|
||||
#define BLAKE3_OUT_LEN 32
|
||||
#define BLAKE3_BLOCK_LEN 64
|
||||
#define BLAKE3_CHUNK_LEN 1024
|
||||
#define BLAKE3_MAX_DEPTH 54
|
||||
#define LLVM_BLAKE3_VERSION_STRING "1.3.1"
|
||||
#define LLVM_BLAKE3_KEY_LEN 32
|
||||
#define LLVM_BLAKE3_OUT_LEN 32
|
||||
#define LLVM_BLAKE3_BLOCK_LEN 64
|
||||
#define LLVM_BLAKE3_CHUNK_LEN 1024
|
||||
#define LLVM_BLAKE3_MAX_DEPTH 54
|
||||
|
||||
// This struct is a private implementation detail. It has to be here because
|
||||
// it's part of blake3_hasher below.
|
||||
// it's part of llvm_blake3_hasher below.
|
||||
typedef struct {
|
||||
uint32_t cv[8];
|
||||
uint64_t chunk_counter;
|
||||
uint8_t buf[BLAKE3_BLOCK_LEN];
|
||||
uint8_t buf[LLVM_BLAKE3_BLOCK_LEN];
|
||||
uint8_t buf_len;
|
||||
uint8_t blocks_compressed;
|
||||
uint8_t flags;
|
||||
} blake3_chunk_state;
|
||||
} llvm_blake3_chunk_state;
|
||||
|
||||
typedef struct {
|
||||
uint32_t key[8];
|
||||
blake3_chunk_state chunk;
|
||||
llvm_blake3_chunk_state chunk;
|
||||
uint8_t cv_stack_len;
|
||||
// The stack size is MAX_DEPTH + 1 because we do lazy merging. For example,
|
||||
// with 7 chunks, we have 3 entries in the stack. Adding an 8th chunk
|
||||
// requires a 4th entry, rather than merging everything down to 1, because we
|
||||
// don't know whether more input is coming. This is different from how the
|
||||
// reference implementation does things.
|
||||
uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
|
||||
} blake3_hasher;
|
||||
uint8_t cv_stack[(LLVM_BLAKE3_MAX_DEPTH + 1) * LLVM_BLAKE3_OUT_LEN];
|
||||
} llvm_blake3_hasher;
|
||||
|
||||
const char *blake3_version(void);
|
||||
void blake3_hasher_init(blake3_hasher *self);
|
||||
void blake3_hasher_init_keyed(blake3_hasher *self,
|
||||
const uint8_t key[BLAKE3_KEY_LEN]);
|
||||
void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context);
|
||||
void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
|
||||
size_t context_len);
|
||||
void blake3_hasher_update(blake3_hasher *self, const void *input,
|
||||
size_t input_len);
|
||||
void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
|
||||
size_t out_len);
|
||||
void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
|
||||
uint8_t *out, size_t out_len);
|
||||
void blake3_hasher_reset(blake3_hasher *self);
|
||||
const char *llvm_blake3_version(void);
|
||||
void llvm_blake3_hasher_init(llvm_blake3_hasher *self);
|
||||
void llvm_blake3_hasher_init_keyed(llvm_blake3_hasher *self,
|
||||
const uint8_t key[LLVM_BLAKE3_KEY_LEN]);
|
||||
void llvm_blake3_hasher_init_derive_key(llvm_blake3_hasher *self,
|
||||
const char *context);
|
||||
void llvm_blake3_hasher_init_derive_key_raw(llvm_blake3_hasher *self,
|
||||
const void *context,
|
||||
size_t context_len);
|
||||
void llvm_blake3_hasher_update(llvm_blake3_hasher *self, const void *input,
|
||||
size_t input_len);
|
||||
void llvm_blake3_hasher_finalize(const llvm_blake3_hasher *self, uint8_t *out,
|
||||
size_t out_len);
|
||||
void llvm_blake3_hasher_finalize_seek(const llvm_blake3_hasher *self,
|
||||
uint64_t seek, uint8_t *out,
|
||||
size_t out_len);
|
||||
void llvm_blake3_hasher_reset(llvm_blake3_hasher *self);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* BLAKE3_H */
|
||||
#endif /* LLVM_C_BLAKE3_H */
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
|
||||
namespace llvm {
|
||||
|
||||
/// The constant \p BLAKE3_OUT_LEN provides the default output length,
|
||||
/// The constant \p LLVM_BLAKE3_OUT_LEN provides the default output length,
|
||||
/// 32 bytes, which is recommended for most callers.
|
||||
///
|
||||
/// Outputs shorter than the default length of 32 bytes (256 bits) provide
|
||||
|
@ -31,7 +31,7 @@ namespace llvm {
|
|||
/// Shorter BLAKE3 outputs are prefixes of longer ones. Explicitly
|
||||
/// requesting a short output is equivalent to truncating the default-length
|
||||
/// output.
|
||||
template <size_t NumBytes = BLAKE3_OUT_LEN>
|
||||
template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
|
||||
using BLAKE3Result = std::array<uint8_t, NumBytes>;
|
||||
|
||||
/// A class that wrap the BLAKE3 algorithm.
|
||||
|
@ -40,37 +40,38 @@ public:
|
|||
BLAKE3() { init(); }
|
||||
|
||||
/// Reinitialize the internal state
|
||||
void init() { blake3_hasher_init(&Hasher); }
|
||||
void init() { llvm_blake3_hasher_init(&Hasher); }
|
||||
|
||||
/// Digest more data.
|
||||
void update(ArrayRef<uint8_t> Data) {
|
||||
blake3_hasher_update(&Hasher, Data.data(), Data.size());
|
||||
llvm_blake3_hasher_update(&Hasher, Data.data(), Data.size());
|
||||
}
|
||||
|
||||
/// Digest more data.
|
||||
void update(StringRef Str) {
|
||||
blake3_hasher_update(&Hasher, Str.data(), Str.size());
|
||||
llvm_blake3_hasher_update(&Hasher, Str.data(), Str.size());
|
||||
}
|
||||
|
||||
/// Finalize the hasher and put the result in \p Result.
|
||||
/// This doesn't modify the hasher itself, and it's possible to finalize again
|
||||
/// after adding more input.
|
||||
template <size_t NumBytes = BLAKE3_OUT_LEN>
|
||||
template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
|
||||
void final(BLAKE3Result<NumBytes> &Result) {
|
||||
blake3_hasher_finalize(&Hasher, Result.data(), Result.size());
|
||||
llvm_blake3_hasher_finalize(&Hasher, Result.data(), Result.size());
|
||||
}
|
||||
|
||||
/// Finalize the hasher and return an output of any length, given in bytes.
|
||||
/// This doesn't modify the hasher itself, and it's possible to finalize again
|
||||
/// after adding more input.
|
||||
template <size_t NumBytes = BLAKE3_OUT_LEN> BLAKE3Result<NumBytes> final() {
|
||||
template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
|
||||
BLAKE3Result<NumBytes> final() {
|
||||
BLAKE3Result<NumBytes> Result;
|
||||
blake3_hasher_finalize(&Hasher, Result.data(), Result.size());
|
||||
llvm_blake3_hasher_finalize(&Hasher, Result.data(), Result.size());
|
||||
return Result;
|
||||
}
|
||||
|
||||
/// Returns a BLAKE3 hash for the given data.
|
||||
template <size_t NumBytes = BLAKE3_OUT_LEN>
|
||||
template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
|
||||
static BLAKE3Result<NumBytes> hash(ArrayRef<uint8_t> Data) {
|
||||
BLAKE3 Hasher;
|
||||
Hasher.update(Data);
|
||||
|
@ -78,7 +79,7 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
blake3_hasher Hasher;
|
||||
llvm_blake3_hasher Hasher;
|
||||
};
|
||||
|
||||
} // namespace llvm
|
||||
|
|
|
@ -1,12 +1,14 @@
|
|||
The official C implementation of BLAKE3.
|
||||
Implementation of BLAKE3, originating from https://github.com/BLAKE3-team/BLAKE3/tree/1.3.1/c
|
||||
|
||||
# Example
|
||||
|
||||
An example program that hashes bytes from standard input and prints the
|
||||
result:
|
||||
|
||||
```c
|
||||
#include "blake3.h"
|
||||
Using the C++ API:
|
||||
|
||||
```c++
|
||||
#include "llvm/Support/BLAKE3.h"
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
@ -15,15 +17,14 @@ result:
|
|||
|
||||
int main() {
|
||||
// Initialize the hasher.
|
||||
blake3_hasher hasher;
|
||||
blake3_hasher_init(&hasher);
|
||||
llvm::BLAKE3 hasher;
|
||||
|
||||
// Read input bytes from stdin.
|
||||
unsigned char buf[65536];
|
||||
char buf[65536];
|
||||
while (1) {
|
||||
ssize_t n = read(STDIN_FILENO, buf, sizeof(buf));
|
||||
if (n > 0) {
|
||||
blake3_hasher_update(&hasher, buf, n);
|
||||
hasher.update(llvm::StringRef(buf, n));
|
||||
} else if (n == 0) {
|
||||
break; // end of file
|
||||
} else {
|
||||
|
@ -32,12 +33,53 @@ int main() {
|
|||
}
|
||||
}
|
||||
|
||||
// Finalize the hash. BLAKE3_OUT_LEN is the default output length, 32 bytes.
|
||||
uint8_t output[BLAKE3_OUT_LEN];
|
||||
blake3_hasher_finalize(&hasher, output, BLAKE3_OUT_LEN);
|
||||
// Finalize the hash. Default output length is 32 bytes.
|
||||
auto output = hasher.final();
|
||||
|
||||
// Print the hash as hexadecimal.
|
||||
for (size_t i = 0; i < BLAKE3_OUT_LEN; i++) {
|
||||
for (uint8_t byte : output) {
|
||||
printf("%02x", byte);
|
||||
}
|
||||
printf("\n");
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
Using the C API:
|
||||
|
||||
```c
|
||||
#include "llvm-c/blake3.h"
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
|
||||
int main() {
|
||||
// Initialize the hasher.
|
||||
llvm_blake3_hasher hasher;
|
||||
llvm_blake3_hasher_init(&hasher);
|
||||
|
||||
// Read input bytes from stdin.
|
||||
unsigned char buf[65536];
|
||||
while (1) {
|
||||
ssize_t n = read(STDIN_FILENO, buf, sizeof(buf));
|
||||
if (n > 0) {
|
||||
llvm_blake3_hasher_update(&hasher, buf, n);
|
||||
} else if (n == 0) {
|
||||
break; // end of file
|
||||
} else {
|
||||
fprintf(stderr, "read failed: %s\n", strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Finalize the hash. LLVM_BLAKE3_OUT_LEN is the default output length, 32 bytes.
|
||||
uint8_t output[LLVM_BLAKE3_OUT_LEN];
|
||||
llvm_blake3_hasher_finalize(&hasher, output, LLVM_BLAKE3_OUT_LEN);
|
||||
|
||||
// Print the hash as hexadecimal.
|
||||
for (size_t i = 0; i < LLVM_BLAKE3_OUT_LEN; i++) {
|
||||
printf("%02x", output[i]);
|
||||
}
|
||||
printf("\n");
|
||||
|
@ -45,29 +87,26 @@ int main() {
|
|||
}
|
||||
```
|
||||
|
||||
The code above is included in this directory as `example.c`. If you're
|
||||
on x86\_64 with a Unix-like OS, you can compile a working binary like
|
||||
this:
|
||||
|
||||
```bash
|
||||
gcc -O3 -o example example.c blake3.c blake3_dispatch.c blake3_portable.c \
|
||||
blake3_sse2_x86-64_unix.S blake3_sse41_x86-64_unix.S blake3_avx2_x86-64_unix.S \
|
||||
blake3_avx512_x86-64_unix.S
|
||||
```
|
||||
|
||||
# API
|
||||
|
||||
## The Struct
|
||||
## The Class/Struct
|
||||
|
||||
```c++
|
||||
class BLAKE3 {
|
||||
// API
|
||||
private:
|
||||
llvm_blake3_hasher Hasher;
|
||||
};
|
||||
```
|
||||
```c
|
||||
typedef struct {
|
||||
// private fields
|
||||
} blake3_hasher;
|
||||
} llvm_blake3_hasher;
|
||||
```
|
||||
|
||||
An incremental BLAKE3 hashing state, which can accept any number of
|
||||
updates. This implementation doesn't allocate any heap memory, but
|
||||
`sizeof(blake3_hasher)` itself is relatively large, currently 1912 bytes
|
||||
`sizeof(llvm_blake3_hasher)` itself is relatively large, currently 1912 bytes
|
||||
on x86-64. This size can be reduced by restricting the maximum input
|
||||
length, as described in Section 5.4 of [the BLAKE3
|
||||
spec](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf),
|
||||
|
@ -75,18 +114,28 @@ but this implementation doesn't currently support that strategy.
|
|||
|
||||
## Common API Functions
|
||||
|
||||
```c++
|
||||
BLAKE3::BLAKE3();
|
||||
|
||||
void BLAKE3::init();
|
||||
```
|
||||
```c
|
||||
void blake3_hasher_init(
|
||||
blake3_hasher *self);
|
||||
void llvm_blake3_hasher_init(
|
||||
llvm_blake3_hasher *self);
|
||||
```
|
||||
|
||||
Initialize a `blake3_hasher` in the default hashing mode.
|
||||
Initialize a `llvm_blake3_hasher` in the default hashing mode.
|
||||
|
||||
---
|
||||
|
||||
```c++
|
||||
void BLAKE3::update(ArrayRef<uint8_t> Data);
|
||||
|
||||
void BLAKE3::update(StringRef Str);
|
||||
```
|
||||
```c
|
||||
void blake3_hasher_update(
|
||||
blake3_hasher *self,
|
||||
void llvm_blake3_hasher_update(
|
||||
llvm_blake3_hasher *self,
|
||||
const void *input,
|
||||
size_t input_len);
|
||||
```
|
||||
|
@ -95,16 +144,26 @@ Add input to the hasher. This can be called any number of times.
|
|||
|
||||
---
|
||||
|
||||
```c++
|
||||
template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
|
||||
using BLAKE3Result = std::array<uint8_t, NumBytes>;
|
||||
|
||||
template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
|
||||
void BLAKE3::final(BLAKE3Result<NumBytes> &Result);
|
||||
|
||||
template <size_t NumBytes = LLVM_BLAKE3_OUT_LEN>
|
||||
BLAKE3Result<NumBytes> final();
|
||||
```
|
||||
```c
|
||||
void blake3_hasher_finalize(
|
||||
const blake3_hasher *self,
|
||||
void llvm_blake3_hasher_finalize(
|
||||
const llvm_blake3_hasher *self,
|
||||
uint8_t *out,
|
||||
size_t out_len);
|
||||
```
|
||||
|
||||
Finalize the hasher and return an output of any length, given in bytes.
|
||||
This doesn't modify the hasher itself, and it's possible to finalize
|
||||
again after adding more input. The constant `BLAKE3_OUT_LEN` provides
|
||||
again after adding more input. The constant `LLVM_BLAKE3_OUT_LEN` provides
|
||||
the default output length, 32 bytes, which is recommended for most
|
||||
callers.
|
||||
|
||||
|
@ -121,25 +180,25 @@ output. (Note that this is different between BLAKE2 and BLAKE3.)
|
|||
## Less Common API Functions
|
||||
|
||||
```c
|
||||
void blake3_hasher_init_keyed(
|
||||
blake3_hasher *self,
|
||||
const uint8_t key[BLAKE3_KEY_LEN]);
|
||||
void llvm_blake3_hasher_init_keyed(
|
||||
llvm_blake3_hasher *self,
|
||||
const uint8_t key[LLVM_BLAKE3_KEY_LEN]);
|
||||
```
|
||||
|
||||
Initialize a `blake3_hasher` in the keyed hashing mode. The key must be
|
||||
Initialize a `llvm_blake3_hasher` in the keyed hashing mode. The key must be
|
||||
exactly 32 bytes.
|
||||
|
||||
---
|
||||
|
||||
```c
|
||||
void blake3_hasher_init_derive_key(
|
||||
blake3_hasher *self,
|
||||
void llvm_blake3_hasher_init_derive_key(
|
||||
llvm_blake3_hasher *self,
|
||||
const char *context);
|
||||
```
|
||||
|
||||
Initialize a `blake3_hasher` in the key derivation mode. The context
|
||||
Initialize a `llvm_blake3_hasher` in the key derivation mode. The context
|
||||
string is given as an initialization parameter, and afterwards input key
|
||||
material should be given with `blake3_hasher_update`. The context string
|
||||
material should be given with `llvm_blake3_hasher_update`. The context string
|
||||
is a null-terminated C string which should be **hardcoded, globally
|
||||
unique, and application-specific**. The context string should not
|
||||
include any dynamic input like salts, nonces, or identifiers read from a
|
||||
|
@ -148,24 +207,24 @@ database at runtime. A good default format for the context string is
|
|||
2019-12-25 16:18:03 session tokens v1"`.
|
||||
|
||||
This function is intended for application code written in C. For
|
||||
language bindings, see `blake3_hasher_init_derive_key_raw` below.
|
||||
language bindings, see `llvm_blake3_hasher_init_derive_key_raw` below.
|
||||
|
||||
---
|
||||
|
||||
```c
|
||||
void blake3_hasher_init_derive_key_raw(
|
||||
blake3_hasher *self,
|
||||
void llvm_blake3_hasher_init_derive_key_raw(
|
||||
llvm_blake3_hasher *self,
|
||||
const void *context,
|
||||
size_t context_len);
|
||||
```
|
||||
|
||||
As `blake3_hasher_init_derive_key` above, except that the context string
|
||||
As `llvm_blake3_hasher_init_derive_key` above, except that the context string
|
||||
is given as a pointer to an array of arbitrary bytes with a provided
|
||||
length. This is intended for writing language bindings, where C string
|
||||
conversion would add unnecessary overhead and new error cases. Unicode
|
||||
strings should be encoded as UTF-8.
|
||||
|
||||
Application code in C should prefer `blake3_hasher_init_derive_key`,
|
||||
Application code in C should prefer `llvm_blake3_hasher_init_derive_key`,
|
||||
which takes the context as a C string. If you need to use arbitrary
|
||||
bytes as a context string in application code, consider whether you're
|
||||
violating the requirement that context strings should be hardcoded.
|
||||
|
@ -173,14 +232,14 @@ violating the requirement that context strings should be hardcoded.
|
|||
---
|
||||
|
||||
```c
|
||||
void blake3_hasher_finalize_seek(
|
||||
const blake3_hasher *self,
|
||||
void llvm_blake3_hasher_finalize_seek(
|
||||
const llvm_blake3_hasher *self,
|
||||
uint64_t seek,
|
||||
uint8_t *out,
|
||||
size_t out_len);
|
||||
```
|
||||
|
||||
The same as `blake3_hasher_finalize`, but with an additional `seek`
|
||||
The same as `llvm_blake3_hasher_finalize`, but with an additional `seek`
|
||||
parameter for the starting byte position in the output stream. To
|
||||
efficiently stream a large output without allocating memory, call this
|
||||
function in a loop, incrementing `seek` by the output length each time.
|
||||
|
@ -188,26 +247,20 @@ function in a loop, incrementing `seek` by the output length each time.
|
|||
---
|
||||
|
||||
```c
|
||||
void blake3_hasher_reset(
|
||||
blake3_hasher *self);
|
||||
void llvm_blake3_hasher_reset(
|
||||
llvm_blake3_hasher *self);
|
||||
```
|
||||
|
||||
Reset the hasher to its initial state, prior to any calls to
|
||||
`blake3_hasher_update`. Currently this is no different from calling
|
||||
`blake3_hasher_init` or similar again. However, if this implementation gains
|
||||
multithreading support in the future, and if `blake3_hasher` holds (optional)
|
||||
threading resources, this function will reuse those resources. Until then, this
|
||||
is mainly for feature compatibility with the Rust implementation.
|
||||
`llvm_blake3_hasher_update`. Currently this is no different from calling
|
||||
`llvm_blake3_hasher_init` or similar again. However, if this implementation gains
|
||||
multithreading support in the future, and if `llvm_blake3_hasher` holds (optional)
|
||||
threading resources, this function will reuse those resources.
|
||||
|
||||
|
||||
# Building
|
||||
|
||||
This implementation is just C and assembly files. It doesn't include a
|
||||
public-facing build system. (The `Makefile` in this directory is only
|
||||
for testing.) Instead, the intention is that you can include these files
|
||||
in whatever build system you're already using. This section describes
|
||||
the commands your build system should execute, or which you can execute
|
||||
by hand. Note that these steps may change in future versions.
|
||||
This implementation is just C and assembly files.
|
||||
|
||||
## x86
|
||||
|
||||
|
@ -225,92 +278,19 @@ different compilers, and they build more quickly. On the other hand, the
|
|||
assembly versions are x86\_64-only, and you need to select the right
|
||||
flavor for your target platform.
|
||||
|
||||
Here's an example of building a shared library on x86\_64 Linux using
|
||||
the assembly implementations:
|
||||
|
||||
```bash
|
||||
gcc -shared -O3 -o libblake3.so blake3.c blake3_dispatch.c blake3_portable.c \
|
||||
blake3_sse2_x86-64_unix.S blake3_sse41_x86-64_unix.S blake3_avx2_x86-64_unix.S \
|
||||
blake3_avx512_x86-64_unix.S
|
||||
```
|
||||
|
||||
When building the intrinsics-based implementations, you need to build
|
||||
each implementation separately, with the corresponding instruction set
|
||||
explicitly enabled in the compiler. Here's the same shared library using
|
||||
the intrinsics-based implementations:
|
||||
|
||||
```bash
|
||||
gcc -c -fPIC -O3 -msse2 blake3_sse2.c -o blake3_sse2.o
|
||||
gcc -c -fPIC -O3 -msse4.1 blake3_sse41.c -o blake3_sse41.o
|
||||
gcc -c -fPIC -O3 -mavx2 blake3_avx2.c -o blake3_avx2.o
|
||||
gcc -c -fPIC -O3 -mavx512f -mavx512vl blake3_avx512.c -o blake3_avx512.o
|
||||
gcc -shared -O3 -o libblake3.so blake3.c blake3_dispatch.c blake3_portable.c \
|
||||
blake3_avx2.o blake3_avx512.o blake3_sse41.o blake3_sse2.o
|
||||
```
|
||||
|
||||
Note above that building `blake3_avx512.c` requires both `-mavx512f` and
|
||||
`-mavx512vl` under GCC and Clang. Under MSVC, the single `/arch:AVX512`
|
||||
flag is sufficient. The MSVC equivalent of `-mavx2` is `/arch:AVX2`.
|
||||
MSVC enables SSE2 and SSE4.1 by defaut, and it doesn't have a
|
||||
corresponding flag.
|
||||
|
||||
If you want to omit SIMD code entirely, you need to explicitly disable
|
||||
each instruction set. Here's an example of building a shared library on
|
||||
x86 with only portable code:
|
||||
|
||||
```bash
|
||||
gcc -shared -O3 -o libblake3.so -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_AVX2 \
|
||||
-DBLAKE3_NO_AVX512 blake3.c blake3_dispatch.c blake3_portable.c
|
||||
```
|
||||
|
||||
## ARM NEON
|
||||
|
||||
The NEON implementation is enabled by default on AArch64, but not on
|
||||
other ARM targets, since not all of them support it. To enable it, set
|
||||
`BLAKE3_USE_NEON=1`. Here's an example of building a shared library on
|
||||
ARM Linux with NEON support:
|
||||
|
||||
```bash
|
||||
gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON=1 blake3.c blake3_dispatch.c \
|
||||
blake3_portable.c blake3_neon.c
|
||||
```
|
||||
`BLAKE3_USE_NEON=1`.
|
||||
|
||||
To explicitiy disable using NEON instructions on AArch64, set
|
||||
`BLAKE3_USE_NEON=0`.
|
||||
|
||||
```bash
|
||||
gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON=0 blake3.c blake3_dispatch.c \
|
||||
blake3_portable.c
|
||||
```
|
||||
|
||||
Note that on some targets (ARMv7 in particular), extra flags may be
|
||||
required to activate NEON support in the compiler. If you see an error
|
||||
like...
|
||||
|
||||
```
|
||||
/usr/lib/gcc/armv7l-unknown-linux-gnueabihf/9.2.0/include/arm_neon.h:635:1: error: inlining failed
|
||||
in call to always_inline ‘vaddq_u32’: target specific option mismatch
|
||||
```
|
||||
|
||||
...then you may need to add something like `-mfpu=neon-vfpv4
|
||||
-mfloat-abi=hard`.
|
||||
|
||||
## Other Platforms
|
||||
|
||||
The portable implementation should work on most other architectures. For
|
||||
example:
|
||||
|
||||
```bash
|
||||
gcc -shared -O3 -o libblake3.so blake3.c blake3_dispatch.c blake3_portable.c
|
||||
```
|
||||
The portable implementation should work on most other architectures.
|
||||
|
||||
# Multithreading
|
||||
|
||||
Unlike the Rust implementation, the C implementation doesn't currently support
|
||||
multithreading. A future version of this library could add support by taking an
|
||||
optional dependency on OpenMP or similar. Alternatively, we could expose a
|
||||
lower-level API to allow callers to implement concurrency themselves. The
|
||||
former would be more convenient and less error-prone, but the latter would give
|
||||
callers the maximum possible amount of control. The best choice here depends on
|
||||
the specific use case, so if you have a use case for multithreaded hashing in
|
||||
C, please file a GitHub issue and let us know.
|
||||
The implementation doesn't currently support multithreading.
|
||||
|
|
|
@ -1,11 +1,18 @@
|
|||
/*===-- blake3.c - BLAKE3 C Implementation ------------------------*- C -*-===*\
|
||||
|* *|
|
||||
|* Released into the public domain with CC0 1.0 *|
|
||||
|* See 'llvm/lib/Support/BLAKE3/LICENSE' for info. *|
|
||||
|* SPDX-License-Identifier: CC0-1.0 *|
|
||||
|* *|
|
||||
\*===----------------------------------------------------------------------===*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "blake3.h"
|
||||
#include "blake3_impl.h"
|
||||
|
||||
const char *blake3_version(void) { return BLAKE3_VERSION_STRING; }
|
||||
const char *llvm_blake3_version(void) { return BLAKE3_VERSION_STRING; }
|
||||
|
||||
INLINE void chunk_state_init(blake3_chunk_state *self, const uint32_t key[8],
|
||||
uint8_t flags) {
|
||||
|
@ -366,29 +373,29 @@ INLINE void hasher_init_base(blake3_hasher *self, const uint32_t key[8],
|
|||
self->cv_stack_len = 0;
|
||||
}
|
||||
|
||||
void blake3_hasher_init(blake3_hasher *self) { hasher_init_base(self, IV, 0); }
|
||||
void llvm_blake3_hasher_init(blake3_hasher *self) { hasher_init_base(self, IV, 0); }
|
||||
|
||||
void blake3_hasher_init_keyed(blake3_hasher *self,
|
||||
void llvm_blake3_hasher_init_keyed(blake3_hasher *self,
|
||||
const uint8_t key[BLAKE3_KEY_LEN]) {
|
||||
uint32_t key_words[8];
|
||||
load_key_words(key, key_words);
|
||||
hasher_init_base(self, key_words, KEYED_HASH);
|
||||
}
|
||||
|
||||
void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
|
||||
void llvm_blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
|
||||
size_t context_len) {
|
||||
blake3_hasher context_hasher;
|
||||
hasher_init_base(&context_hasher, IV, DERIVE_KEY_CONTEXT);
|
||||
blake3_hasher_update(&context_hasher, context, context_len);
|
||||
llvm_blake3_hasher_update(&context_hasher, context, context_len);
|
||||
uint8_t context_key[BLAKE3_KEY_LEN];
|
||||
blake3_hasher_finalize(&context_hasher, context_key, BLAKE3_KEY_LEN);
|
||||
llvm_blake3_hasher_finalize(&context_hasher, context_key, BLAKE3_KEY_LEN);
|
||||
uint32_t context_key_words[8];
|
||||
load_key_words(context_key, context_key_words);
|
||||
hasher_init_base(self, context_key_words, DERIVE_KEY_MATERIAL);
|
||||
}
|
||||
|
||||
void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context) {
|
||||
blake3_hasher_init_derive_key_raw(self, context, strlen(context));
|
||||
void llvm_blake3_hasher_init_derive_key(blake3_hasher *self, const char *context) {
|
||||
llvm_blake3_hasher_init_derive_key_raw(self, context, strlen(context));
|
||||
}
|
||||
|
||||
// As described in hasher_push_cv() below, we do "lazy merging", delaying
|
||||
|
@ -452,7 +459,7 @@ INLINE void hasher_push_cv(blake3_hasher *self, uint8_t new_cv[BLAKE3_OUT_LEN],
|
|||
self->cv_stack_len += 1;
|
||||
}
|
||||
|
||||
void blake3_hasher_update(blake3_hasher *self, const void *input,
|
||||
void llvm_blake3_hasher_update(blake3_hasher *self, const void *input,
|
||||
size_t input_len) {
|
||||
// Explicitly checking for zero avoids causing UB by passing a null pointer
|
||||
// to memcpy. This comes up in practice with things like:
|
||||
|
@ -561,12 +568,12 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
|
|||
}
|
||||
}
|
||||
|
||||
void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
|
||||
void llvm_blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
|
||||
size_t out_len) {
|
||||
blake3_hasher_finalize_seek(self, 0, out, out_len);
|
||||
llvm_blake3_hasher_finalize_seek(self, 0, out, out_len);
|
||||
}
|
||||
|
||||
void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
|
||||
void llvm_blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
|
||||
uint8_t *out, size_t out_len) {
|
||||
// Explicitly checking for zero avoids causing UB by passing a null pointer
|
||||
// to memcpy. This comes up in practice with things like:
|
||||
|
@ -610,7 +617,7 @@ void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
|
|||
output_root_bytes(&output, seek, out, out_len);
|
||||
}
|
||||
|
||||
void blake3_hasher_reset(blake3_hasher *self) {
|
||||
void llvm_blake3_hasher_reset(blake3_hasher *self) {
|
||||
chunk_state_reset(&self->chunk, self->key, 0);
|
||||
self->cv_stack_len = 0;
|
||||
}
|
||||
|
|
|
@ -1,60 +0,0 @@
|
|||
#ifndef BLAKE3_H
|
||||
#define BLAKE3_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define BLAKE3_VERSION_STRING "1.3.1"
|
||||
#define BLAKE3_KEY_LEN 32
|
||||
#define BLAKE3_OUT_LEN 32
|
||||
#define BLAKE3_BLOCK_LEN 64
|
||||
#define BLAKE3_CHUNK_LEN 1024
|
||||
#define BLAKE3_MAX_DEPTH 54
|
||||
|
||||
// This struct is a private implementation detail. It has to be here because
|
||||
// it's part of blake3_hasher below.
|
||||
typedef struct {
|
||||
uint32_t cv[8];
|
||||
uint64_t chunk_counter;
|
||||
uint8_t buf[BLAKE3_BLOCK_LEN];
|
||||
uint8_t buf_len;
|
||||
uint8_t blocks_compressed;
|
||||
uint8_t flags;
|
||||
} blake3_chunk_state;
|
||||
|
||||
typedef struct {
|
||||
uint32_t key[8];
|
||||
blake3_chunk_state chunk;
|
||||
uint8_t cv_stack_len;
|
||||
// The stack size is MAX_DEPTH + 1 because we do lazy merging. For example,
|
||||
// with 7 chunks, we have 3 entries in the stack. Adding an 8th chunk
|
||||
// requires a 4th entry, rather than merging everything down to 1, because we
|
||||
// don't know whether more input is coming. This is different from how the
|
||||
// reference implementation does things.
|
||||
uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
|
||||
} blake3_hasher;
|
||||
|
||||
const char *blake3_version(void);
|
||||
void blake3_hasher_init(blake3_hasher *self);
|
||||
void blake3_hasher_init_keyed(blake3_hasher *self,
|
||||
const uint8_t key[BLAKE3_KEY_LEN]);
|
||||
void blake3_hasher_init_derive_key(blake3_hasher *self, const char *context);
|
||||
void blake3_hasher_init_derive_key_raw(blake3_hasher *self, const void *context,
|
||||
size_t context_len);
|
||||
void blake3_hasher_update(blake3_hasher *self, const void *input,
|
||||
size_t input_len);
|
||||
void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
|
||||
size_t out_len);
|
||||
void blake3_hasher_finalize_seek(const blake3_hasher *self, uint64_t seek,
|
||||
uint8_t *out, size_t out_len);
|
||||
void blake3_hasher_reset(blake3_hasher *self);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* BLAKE3_H */
|
|
@ -12,7 +12,15 @@
|
|||
#define _CET_ENDBR
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#define HIDDEN .private_extern
|
||||
#else
|
||||
#define HIDDEN .hidden
|
||||
#endif
|
||||
|
||||
.intel_syntax noprefix
|
||||
HIDDEN _blake3_hash_many_avx2
|
||||
HIDDEN blake3_hash_many_avx2
|
||||
.global _blake3_hash_many_avx2
|
||||
.global blake3_hash_many_avx2
|
||||
#ifdef __APPLE__
|
||||
|
|
|
@ -1,4 +1,6 @@
|
|||
.intel_syntax noprefix
|
||||
.hidden _blake3_hash_many_avx2
|
||||
.hidden blake3_hash_many_avx2
|
||||
.global _blake3_hash_many_avx2
|
||||
.global blake3_hash_many_avx2
|
||||
.section .text
|
||||
|
|
|
@ -12,7 +12,19 @@
|
|||
#define _CET_ENDBR
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#define HIDDEN .private_extern
|
||||
#else
|
||||
#define HIDDEN .hidden
|
||||
#endif
|
||||
|
||||
.intel_syntax noprefix
|
||||
HIDDEN _blake3_hash_many_avx512
|
||||
HIDDEN blake3_hash_many_avx512
|
||||
HIDDEN blake3_compress_in_place_avx512
|
||||
HIDDEN _blake3_compress_in_place_avx512
|
||||
HIDDEN blake3_compress_xof_avx512
|
||||
HIDDEN _blake3_compress_xof_avx512
|
||||
.global _blake3_hash_many_avx512
|
||||
.global blake3_hash_many_avx512
|
||||
.global blake3_compress_in_place_avx512
|
||||
|
|
|
@ -1,5 +1,11 @@
|
|||
.intel_syntax noprefix
|
||||
|
||||
.hidden _blake3_hash_many_avx512
|
||||
.hidden blake3_hash_many_avx512
|
||||
.hidden blake3_compress_in_place_avx512
|
||||
.hidden _blake3_compress_in_place_avx512
|
||||
.hidden blake3_compress_xof_avx512
|
||||
.hidden _blake3_compress_xof_avx512
|
||||
.global _blake3_hash_many_avx512
|
||||
.global blake3_hash_many_avx512
|
||||
.global blake3_compress_in_place_avx512
|
||||
|
|
|
@ -78,6 +78,7 @@ static /* Allow the variable to be controlled manually for testing */
|
|||
#endif
|
||||
enum cpu_feature g_cpu_features = UNDEFINED;
|
||||
|
||||
LLVM_ATTRIBUTE_USED
|
||||
#if !defined(BLAKE3_TESTING)
|
||||
static
|
||||
#endif
|
||||
|
|
|
@ -7,7 +7,19 @@
|
|||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "blake3.h"
|
||||
#include "llvm-c/blake3.h"
|
||||
// For \p LLVM_LIBRARY_VISIBILITY
|
||||
#include "llvm/Support/Compiler.h"
|
||||
|
||||
// Remove the 'llvm_' prefix for the rest of the internal implementation.
|
||||
#define BLAKE3_VERSION_STRING LLVM_BLAKE3_VERSION_STRING
|
||||
#define BLAKE3_KEY_LEN LLVM_BLAKE3_KEY_LEN
|
||||
#define BLAKE3_OUT_LEN LLVM_BLAKE3_OUT_LEN
|
||||
#define BLAKE3_BLOCK_LEN LLVM_BLAKE3_BLOCK_LEN
|
||||
#define BLAKE3_CHUNK_LEN LLVM_BLAKE3_CHUNK_LEN
|
||||
#define BLAKE3_MAX_DEPTH LLVM_BLAKE3_MAX_DEPTH
|
||||
#define blake3_hasher llvm_blake3_hasher
|
||||
#define blake3_chunk_state llvm_blake3_chunk_state
|
||||
|
||||
// internal flags
|
||||
enum blake3_flags {
|
||||
|
@ -178,35 +190,42 @@ INLINE void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) {
|
|||
store32(&bytes_out[7 * 4], cv_words[7]);
|
||||
}
|
||||
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
void blake3_compress_in_place(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter,
|
||||
uint8_t flags);
|
||||
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
void blake3_compress_xof(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter, uint8_t flags,
|
||||
uint8_t out[64]);
|
||||
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
||||
size_t blocks, const uint32_t key[8], uint64_t counter,
|
||||
bool increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
||||
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
size_t blake3_simd_degree(void);
|
||||
|
||||
|
||||
// Declarations for implementation-specific functions.
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
void blake3_compress_in_place_portable(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter,
|
||||
uint8_t flags);
|
||||
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
void blake3_compress_xof_portable(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter,
|
||||
uint8_t flags, uint8_t out[64]);
|
||||
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
|
||||
size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, bool increment_counter,
|
||||
|
@ -215,14 +234,17 @@ void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
|
|||
|
||||
#if defined(IS_X86)
|
||||
#if !defined(BLAKE3_NO_SSE2)
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
void blake3_compress_in_place_sse2(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter,
|
||||
uint8_t flags);
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
void blake3_compress_xof_sse2(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter,
|
||||
uint8_t flags, uint8_t out[64]);
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
void blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs,
|
||||
size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, bool increment_counter,
|
||||
|
@ -230,14 +252,17 @@ void blake3_hash_many_sse2(const uint8_t *const *inputs, size_t num_inputs,
|
|||
uint8_t flags_end, uint8_t *out);
|
||||
#endif
|
||||
#if !defined(BLAKE3_NO_SSE41)
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
void blake3_compress_in_place_sse41(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter,
|
||||
uint8_t flags);
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
void blake3_compress_xof_sse41(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter,
|
||||
uint8_t flags, uint8_t out[64]);
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
|
||||
size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, bool increment_counter,
|
||||
|
@ -245,6 +270,7 @@ void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
|
|||
uint8_t flags_end, uint8_t *out);
|
||||
#endif
|
||||
#if !defined(BLAKE3_NO_AVX2)
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
|
||||
size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, bool increment_counter,
|
||||
|
@ -252,16 +278,19 @@ void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
|
|||
uint8_t flags_end, uint8_t *out);
|
||||
#endif
|
||||
#if !defined(BLAKE3_NO_AVX512)
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
void blake3_compress_in_place_avx512(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter,
|
||||
uint8_t flags);
|
||||
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
void blake3_compress_xof_avx512(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter,
|
||||
uint8_t flags, uint8_t out[64]);
|
||||
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
|
||||
size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, bool increment_counter,
|
||||
|
@ -271,6 +300,7 @@ void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
|
|||
#endif
|
||||
|
||||
#if BLAKE3_USE_NEON == 1
|
||||
LLVM_LIBRARY_VISIBILITY
|
||||
void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
|
||||
size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, bool increment_counter,
|
||||
|
|
|
@ -228,6 +228,7 @@ INLINE void load_counters4(uint64_t counter, bool increment_counter,
|
|||
counter_high(counter + (mask & 2)), counter_high(counter + (mask & 3)));
|
||||
}
|
||||
|
||||
static
|
||||
void blake3_hash4_neon(const uint8_t *const *inputs, size_t blocks,
|
||||
const uint32_t key[8], uint64_t counter,
|
||||
bool increment_counter, uint8_t flags,
|
||||
|
|
|
@ -12,7 +12,19 @@
|
|||
#define _CET_ENDBR
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#define HIDDEN .private_extern
|
||||
#else
|
||||
#define HIDDEN .hidden
|
||||
#endif
|
||||
|
||||
.intel_syntax noprefix
|
||||
HIDDEN blake3_hash_many_sse2
|
||||
HIDDEN _blake3_hash_many_sse2
|
||||
HIDDEN blake3_compress_in_place_sse2
|
||||
HIDDEN _blake3_compress_in_place_sse2
|
||||
HIDDEN blake3_compress_xof_sse2
|
||||
HIDDEN _blake3_compress_xof_sse2
|
||||
.global blake3_hash_many_sse2
|
||||
.global _blake3_hash_many_sse2
|
||||
.global blake3_compress_in_place_sse2
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
.intel_syntax noprefix
|
||||
.hidden blake3_hash_many_sse2
|
||||
.hidden _blake3_hash_many_sse2
|
||||
.hidden blake3_compress_in_place_sse2
|
||||
.hidden _blake3_compress_in_place_sse2
|
||||
.hidden blake3_compress_xof_sse2
|
||||
.hidden _blake3_compress_xof_sse2
|
||||
.global blake3_hash_many_sse2
|
||||
.global _blake3_hash_many_sse2
|
||||
.global blake3_compress_in_place_sse2
|
||||
|
|
|
@ -12,7 +12,19 @@
|
|||
#define _CET_ENDBR
|
||||
#endif
|
||||
|
||||
#ifdef __APPLE__
|
||||
#define HIDDEN .private_extern
|
||||
#else
|
||||
#define HIDDEN .hidden
|
||||
#endif
|
||||
|
||||
.intel_syntax noprefix
|
||||
HIDDEN blake3_hash_many_sse41
|
||||
HIDDEN _blake3_hash_many_sse41
|
||||
HIDDEN blake3_compress_in_place_sse41
|
||||
HIDDEN _blake3_compress_in_place_sse41
|
||||
HIDDEN blake3_compress_xof_sse41
|
||||
HIDDEN _blake3_compress_xof_sse41
|
||||
.global blake3_hash_many_sse41
|
||||
.global _blake3_hash_many_sse41
|
||||
.global blake3_compress_in_place_sse41
|
||||
|
|
|
@ -1,4 +1,10 @@
|
|||
.intel_syntax noprefix
|
||||
.hidden blake3_hash_many_sse41
|
||||
.hidden _blake3_hash_many_sse41
|
||||
.hidden blake3_compress_in_place_sse41
|
||||
.hidden _blake3_compress_in_place_sse41
|
||||
.hidden blake3_compress_xof_sse41
|
||||
.hidden _blake3_compress_xof_sse41
|
||||
.global blake3_hash_many_sse41
|
||||
.global _blake3_hash_many_sse41
|
||||
.global blake3_compress_in_place_sse41
|
||||
|
|
Loading…
Reference in New Issue