From d449c600767284486615f3b79601ced15a00af61 Mon Sep 17 00:00:00 2001 From: Cole Kissane Date: Wed, 13 Jul 2022 19:58:42 -0700 Subject: [PATCH] [llvm] add zstd to llvm::compression namespace - add `FindZSTD.cmake` - add zstd to `llvm::compression` namespace - add a CMake option `LLVM_ENABLE_ZSTD` with behavior mirroring that of `LLVM_ENABLE_ZLIB` - add tests for zstd to `llvm/unittests/Support/CompressionTest.cpp` Reviewed By: leonardchan, MaskRay Differential Revision: https://reviews.llvm.org/D128465 --- llvm/CMakeLists.txt | 2 + llvm/cmake/config-ix.cmake | 29 +++++++++ llvm/cmake/modules/FindZSTD.cmake | 21 +++++++ llvm/cmake/modules/LLVMConfig.cmake.in | 6 ++ llvm/include/llvm/Config/llvm-config.h.cmake | 3 + llvm/include/llvm/Support/Compression.h | 22 +++++++ llvm/lib/Support/CMakeLists.txt | 4 ++ llvm/lib/Support/Compression.cpp | 65 ++++++++++++++++++++ llvm/test/lit.site.cfg.py.in | 1 + llvm/unittests/Support/CompressionTest.cpp | 38 ++++++++++++ utils/bazel/llvm_configs/llvm-config.h.cmake | 3 + 11 files changed, 194 insertions(+) create mode 100644 llvm/cmake/modules/FindZSTD.cmake diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt index 53c4158866ed..45399dc0537e 100644 --- a/llvm/CMakeLists.txt +++ b/llvm/CMakeLists.txt @@ -438,6 +438,8 @@ endif() set(LLVM_ENABLE_ZLIB "ON" CACHE STRING "Use zlib for compression/decompression if available. Can be ON, OFF, or FORCE_ON") +set(LLVM_ENABLE_ZSTD "ON" CACHE STRING "Use zstd for compression/decompression if available. Can be ON, OFF, or FORCE_ON") + set(LLVM_ENABLE_CURL "OFF" CACHE STRING "Use libcurl for the HTTP client if available. Can be ON, OFF, or FORCE_ON") set(LLVM_ENABLE_HTTPLIB "OFF" CACHE STRING "Use cpp-httplib HTTP server library if available. Can be ON, OFF, or FORCE_ON") diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake index 1d6743d9f460..4dd4fc938500 100644 --- a/llvm/cmake/config-ix.cmake +++ b/llvm/cmake/config-ix.cmake @@ -138,6 +138,35 @@ else() set(LLVM_ENABLE_ZLIB 0) endif() +if(LLVM_ENABLE_ZSTD) + if(LLVM_ENABLE_ZSTD STREQUAL FORCE_ON) + find_package(ZSTD REQUIRED) + elseif(NOT LLVM_USE_SANITIZER MATCHES "Memory.*") + find_package(ZSTD) + endif() + if(ZSTD_FOUND) + # Check if zstd we found is usable; for example, we may have found a 32-bit + # library on a 64-bit system which would result in a link-time failure. + cmake_push_check_state() + list(APPEND CMAKE_REQUIRED_INCLUDES ${ZSTD_INCLUDE_DIR}) + list(APPEND CMAKE_REQUIRED_LIBRARIES ${ZSTD_LIBRARY}) + check_symbol_exists(ZSTD_compress zstd.h HAVE_ZSTD) + cmake_pop_check_state() + else() + set(HAVE_ZSTD 0) + endif() + if(LLVM_ENABLE_ZSTD STREQUAL FORCE_ON AND NOT HAVE_ZSTD) + message(FATAL_ERROR "Failed to configure zstd") + endif() + if(HAVE_ZSTD) + set(LLVM_ENABLE_ZSTD 1) + else() + set(LLVM_ENABLE_ZSTD 0) + endif() +else() + set(LLVM_ENABLE_ZSTD 0) +endif() + if(LLVM_ENABLE_LIBXML2) if(LLVM_ENABLE_LIBXML2 STREQUAL FORCE_ON) find_package(LibXml2 REQUIRED) diff --git a/llvm/cmake/modules/FindZSTD.cmake b/llvm/cmake/modules/FindZSTD.cmake new file mode 100644 index 000000000000..43ccf7232138 --- /dev/null +++ b/llvm/cmake/modules/FindZSTD.cmake @@ -0,0 +1,21 @@ +find_path(ZSTD_INCLUDE_DIR + NAMES zstd.h + HINTS ${ZSTD_ROOT_DIR}/include) + +find_library(ZSTD_LIBRARY + NAMES zstd + HINTS ${ZSTD_ROOT_DIR}/lib) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args( + ZSTD DEFAULT_MSG + ZSTD_LIBRARY ZSTD_INCLUDE_DIR) + +if(ZSTD_FOUND) + set(ZSTD_LIBRARIES ${ZSTD_LIBRARY}) + set(ZSTD_INCLUDE_DIRS ${ZSTD_INCLUDE_DIR}) +endif() + +mark_as_advanced( + ZSTD_INCLUDE_DIR + ZSTD_LIBRARY) diff --git a/llvm/cmake/modules/LLVMConfig.cmake.in b/llvm/cmake/modules/LLVMConfig.cmake.in index d95fa919ef51..0a9d5c24252b 100644 --- a/llvm/cmake/modules/LLVMConfig.cmake.in +++ b/llvm/cmake/modules/LLVMConfig.cmake.in @@ -73,6 +73,12 @@ if(LLVM_ENABLE_ZLIB) find_package(ZLIB) endif() +set(LLVM_ENABLE_ZSTD @LLVM_ENABLE_ZSTD@) +if(LLVM_ENABLE_ZSTD) + set(ZSTD_ROOT @ZSTD_ROOT@) + find_package(ZSTD) +endif() + set(LLVM_ENABLE_LIBXML2 @LLVM_ENABLE_LIBXML2@) if(LLVM_ENABLE_LIBXML2) find_package(LibXml2) diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake index 3355f1233720..96fbe6f771d2 100644 --- a/llvm/include/llvm/Config/llvm-config.h.cmake +++ b/llvm/include/llvm/Config/llvm-config.h.cmake @@ -95,6 +95,9 @@ /* Define if zlib compression is available */ #cmakedefine01 LLVM_ENABLE_ZLIB +/* Define if zstd compression is available */ +#cmakedefine01 LLVM_ENABLE_ZSTD + /* Define if LLVM was built with a dependency to the libtensorflow dynamic library */ #cmakedefine LLVM_HAVE_TF_API diff --git a/llvm/include/llvm/Support/Compression.h b/llvm/include/llvm/Support/Compression.h index c99f811459ab..0beda6a516d5 100644 --- a/llvm/include/llvm/Support/Compression.h +++ b/llvm/include/llvm/Support/Compression.h @@ -44,6 +44,28 @@ Error uncompress(ArrayRef Input, } // End of namespace zlib +namespace zstd { + +constexpr int NoCompression = -5; +constexpr int BestSpeedCompression = 1; +constexpr int DefaultCompression = 5; +constexpr int BestSizeCompression = 12; + +bool isAvailable(); + +void compress(ArrayRef Input, + SmallVectorImpl &CompressedBuffer, + int Level = DefaultCompression); + +Error uncompress(ArrayRef Input, uint8_t *UncompressedBuffer, + size_t &UncompressedSize); + +Error uncompress(ArrayRef Input, + SmallVectorImpl &UncompressedBuffer, + size_t UncompressedSize); + +} // End of namespace zstd + } // End of namespace compression } // End of namespace llvm diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt index e1045e459d70..52b95c5377d3 100644 --- a/llvm/lib/Support/CMakeLists.txt +++ b/llvm/lib/Support/CMakeLists.txt @@ -25,6 +25,10 @@ if(LLVM_ENABLE_ZLIB) set(imported_libs ZLIB::ZLIB) endif() +if(LLVM_ENABLE_ZSTD) + list(APPEND imported_libs zstd) +endif() + if( MSVC OR MINGW ) # libuuid required for FOLDERID_Profile usage in lib/Support/Windows/Path.inc. # advapi32 required for CryptAcquireContextW in lib/Support/Windows/Path.inc. diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp index 21191972fb8b..e8fb715aa770 100644 --- a/llvm/lib/Support/Compression.cpp +++ b/llvm/lib/Support/Compression.cpp @@ -20,6 +20,9 @@ #if LLVM_ENABLE_ZLIB #include #endif +#if LLVM_ENABLE_ZSTD +#include +#endif using namespace llvm; using namespace llvm::compression; @@ -100,3 +103,65 @@ Error zlib::uncompress(ArrayRef Input, llvm_unreachable("zlib::uncompress is unavailable"); } #endif + +#if LLVM_ENABLE_ZSTD + +bool zstd::isAvailable() { return true; } + +void zstd::compress(ArrayRef Input, + SmallVectorImpl &CompressedBuffer, int Level) { + unsigned long CompressedBufferSize = ::ZSTD_compressBound(Input.size()); + CompressedBuffer.resize_for_overwrite(CompressedBufferSize); + unsigned long CompressedSize = + ::ZSTD_compress((char *)CompressedBuffer.data(), CompressedBufferSize, + (const char *)Input.data(), Input.size(), Level); + if (ZSTD_isError(CompressedSize)) + report_bad_alloc_error("Allocation failed"); + // Tell MemorySanitizer that zstd output buffer is fully initialized. + // This avoids a false report when running LLVM with uninstrumented ZLib. + __msan_unpoison(CompressedBuffer.data(), CompressedSize); + if (CompressedSize < CompressedBuffer.size()) + CompressedBuffer.truncate(CompressedSize); +} + +Error zstd::uncompress(ArrayRef Input, uint8_t *UncompressedBuffer, + size_t &UncompressedSize) { + const size_t Res = + ::ZSTD_decompress(UncompressedBuffer, UncompressedSize, + (const uint8_t *)Input.data(), Input.size()); + UncompressedSize = Res; + // Tell MemorySanitizer that zstd output buffer is fully initialized. + // This avoids a false report when running LLVM with uninstrumented ZLib. + __msan_unpoison(UncompressedBuffer, UncompressedSize); + return ZSTD_isError(Res) ? make_error(ZSTD_getErrorName(Res), + inconvertibleErrorCode()) + : Error::success(); +} + +Error zstd::uncompress(ArrayRef Input, + SmallVectorImpl &UncompressedBuffer, + size_t UncompressedSize) { + UncompressedBuffer.resize_for_overwrite(UncompressedSize); + Error E = + zstd::uncompress(Input, UncompressedBuffer.data(), UncompressedSize); + if (UncompressedSize < UncompressedBuffer.size()) + UncompressedBuffer.truncate(UncompressedSize); + return E; +} + +#else +bool zstd::isAvailable() { return false; } +void zstd::compress(ArrayRef Input, + SmallVectorImpl &CompressedBuffer, int Level) { + llvm_unreachable("zstd::compress is unavailable"); +} +Error zstd::uncompress(ArrayRef Input, uint8_t *UncompressedBuffer, + size_t &UncompressedSize) { + llvm_unreachable("zstd::uncompress is unavailable"); +} +Error zstd::uncompress(ArrayRef Input, + SmallVectorImpl &UncompressedBuffer, + size_t UncompressedSize) { + llvm_unreachable("zstd::uncompress is unavailable"); +} +#endif diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in index 520a54bc108f..09210e2e56d4 100644 --- a/llvm/test/lit.site.cfg.py.in +++ b/llvm/test/lit.site.cfg.py.in @@ -37,6 +37,7 @@ config.host_ldflags = '@HOST_LDFLAGS@' config.llvm_use_intel_jitevents = @LLVM_USE_INTEL_JITEVENTS@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@" config.have_zlib = @LLVM_ENABLE_ZLIB@ +config.have_zstd = @LLVM_ENABLE_ZSTD@ config.have_libxar = @LLVM_HAVE_LIBXAR@ config.have_libxml2 = @LLVM_ENABLE_LIBXML2@ config.have_curl = @LLVM_ENABLE_CURL@ diff --git a/llvm/unittests/Support/CompressionTest.cpp b/llvm/unittests/Support/CompressionTest.cpp index 8e6189ebe224..a89dadf5f9ae 100644 --- a/llvm/unittests/Support/CompressionTest.cpp +++ b/llvm/unittests/Support/CompressionTest.cpp @@ -61,4 +61,42 @@ TEST(CompressionTest, Zlib) { } #endif +#if LLVM_ENABLE_ZSTD +static void testZstdCompression(StringRef Input, int Level) { + SmallVector Compressed; + SmallVector Uncompressed; + zstd::compress(arrayRefFromStringRef(Input), Compressed, Level); + + // Check that uncompressed buffer is the same as original. + Error E = zstd::uncompress(Compressed, Uncompressed, Input.size()); + consumeError(std::move(E)); + + EXPECT_EQ(Input, toStringRef(Uncompressed)); + if (Input.size() > 0) { + // Uncompression fails if expected length is too short. + E = zstd::uncompress(Compressed, Uncompressed, Input.size() - 1); + EXPECT_EQ("Destination buffer is too small", llvm::toString(std::move(E))); + } +} + +TEST(CompressionTest, Zstd) { + testZstdCompression("", zstd::DefaultCompression); + + testZstdCompression("hello, world!", zstd::NoCompression); + testZstdCompression("hello, world!", zstd::BestSizeCompression); + testZstdCompression("hello, world!", zstd::BestSpeedCompression); + testZstdCompression("hello, world!", zstd::DefaultCompression); + + const size_t kSize = 1024; + char BinaryData[kSize]; + for (size_t i = 0; i < kSize; ++i) + BinaryData[i] = i & 255; + StringRef BinaryDataStr(BinaryData, kSize); + + testZstdCompression(BinaryDataStr, zstd::NoCompression); + testZstdCompression(BinaryDataStr, zstd::BestSizeCompression); + testZstdCompression(BinaryDataStr, zstd::BestSpeedCompression); + testZstdCompression(BinaryDataStr, zstd::DefaultCompression); +} +#endif } diff --git a/utils/bazel/llvm_configs/llvm-config.h.cmake b/utils/bazel/llvm_configs/llvm-config.h.cmake index 3355f1233720..96fbe6f771d2 100644 --- a/utils/bazel/llvm_configs/llvm-config.h.cmake +++ b/utils/bazel/llvm_configs/llvm-config.h.cmake @@ -95,6 +95,9 @@ /* Define if zlib compression is available */ #cmakedefine01 LLVM_ENABLE_ZLIB +/* Define if zstd compression is available */ +#cmakedefine01 LLVM_ENABLE_ZSTD + /* Define if LLVM was built with a dependency to the libtensorflow dynamic library */ #cmakedefine LLVM_HAVE_TF_API