Use Smallz4 instead of liblz4 which is optional ##build
* Initial smallz4 support, related to #19849 * Patch smallz4 to return -1 on error (as opposed to just exit) * Add new function 'unlz4Block_userPtr' that can decompress an lz4 block * New '--with-smallz4' flag to compile and use smallz4 instead of lz4 * Reuse the new unlz4Block_userPtr function to remove duplicate code from unlz4_userPtr in smallz4 * smallz4: remove patch, format small4cat.c properly
This commit is contained in:
parent
09169f97f7
commit
b06cfb30d8
|
@ -102,6 +102,7 @@ LIBZIP=@LIBZIP@
|
|||
LIBXXHASH=@LIBXXHASH@
|
||||
|
||||
USE_SYSLZ4=@USE_SYSLZ4@
|
||||
USE_SMALLZ4=@USE_SMALLZ4@
|
||||
|
||||
ifeq ($(HAVE_LIB_SSL),1)
|
||||
SSL_CFLAGS=@SSL_CFLAGS@
|
||||
|
|
|
@ -35,6 +35,7 @@ USE_CS5=0
|
|||
USE_CS4=0
|
||||
WITH_CAPSTONE=0
|
||||
WITH_SYSLZ4=0
|
||||
WITH_SMALLZ4=0
|
||||
USE_ZIP=0
|
||||
USE_XXHASH=0
|
||||
WITH_GPL=1
|
||||
|
@ -188,8 +189,7 @@ System types:
|
|||
--target=TARGET configure for building compilers for TARGET [HOST]
|
||||
EOF2
|
||||
|
||||
printf "
|
||||
Optional Features:
|
||||
printf "\nOptional Features:
|
||||
--disable-debugger disable native debugger features
|
||||
--with-sysmagic force to use system's magic
|
||||
--disable-threads disable use of thread apis
|
||||
|
@ -207,6 +207,7 @@ Optional Features:
|
|||
--with-capstone4 build v4 branch of capstone
|
||||
--with-syscapstone force to use system-wide capstone
|
||||
--with-syslz4 force to use system's liblz4
|
||||
--with-smallz4 build with smallz4 use it as the lz4 decompressor
|
||||
--with-syszip force to use system's libzip and zlib
|
||||
--with-sysxxhash force to use system's xxhash
|
||||
--without-gpl do not build GPL code (grub, cxx, ... )
|
||||
|
@ -218,20 +219,16 @@ Optional Features:
|
|||
--with-ostype Choose OS ( android windows wsl mingw32 bsd solaris gnulinux darwin haiku ) (USEROSTYPE=auto)
|
||||
--with-libversion specify different libversion (LIBVERSION=xxx)
|
||||
--without-jemalloc build without jemalloc
|
||||
--with-checks-level value between 0 and 3 to enable different level of assert (see R_CHECKS_LEVEL) (R_CHECKS_LEVEL=2)
|
||||
"
|
||||
printf "
|
||||
Some influential environment variables:
|
||||
--with-checks-level value between 0 and 3 to enable different level of assert (see R_CHECKS_LEVEL) (R_CHECKS_LEVEL=2)\n"
|
||||
printf "\nSome influential environment variables:
|
||||
CC C compiler command
|
||||
CFLAGS C compiler flags
|
||||
LDFLAGS linker flags, e.g. -L<lib dir> if you have libraries in a
|
||||
nonstandard directory <lib dir>
|
||||
CPPFLAGS C/C++ preprocessor flags, e.g. -I<include dir> if you have
|
||||
headers in a nonstandard directory <include dir>
|
||||
CPP C preprocessor
|
||||
"
|
||||
printf "
|
||||
Report bugs to: pancake <pancake@nopcode.org>"
|
||||
CPP C preprocessor\n"
|
||||
printf "\nReport bugs to: pancake <pancake@nopcode.org>"
|
||||
echo ""
|
||||
exit 0
|
||||
}
|
||||
|
@ -295,7 +292,7 @@ echo "LANGS: c"
|
|||
echo "REQUIRED: libdl"
|
||||
echo "OPTIONAL: libmagic libz libzip libxxhash libssl liblibuv>=1.0.0"
|
||||
echo "PKGCONFIG: capstone liblz4 openssl libuv"
|
||||
echo "FLAGS: --disable-debugger --with-sysmagic --disable-threads --disable-loadlibs --enable-threadsafety --without-dylink --without-fork --without-ptrace-wrap --without-gperf --without-capstone --with-new-io-cache --with-libr --with-static-themes --with-capstone5 --with-capstone4 --with-syscapstone --with-syslz4 --with-syszip --with-sysxxhash --without-gpl --with-ssl --with-ssl-crypto --with-libuv --with-rpath --with-compiler=gcc --with-ostype=auto --with-libversion=xxx --without-jemalloc --with-checks-level=2"
|
||||
echo "FLAGS: --disable-debugger --with-sysmagic --disable-threads --disable-loadlibs --enable-threadsafety --without-dylink --without-fork --without-ptrace-wrap --without-gperf --without-capstone --with-new-io-cache --with-libr --with-static-themes --with-capstone5 --with-capstone4 --with-syscapstone --with-syslz4 --with-smallz4 --with-syszip --with-sysxxhash --without-gpl --with-ssl --with-ssl-crypto --with-libuv --with-rpath --with-compiler=gcc --with-ostype=auto --with-libversion=xxx --without-jemalloc --with-checks-level=2"
|
||||
exit 0
|
||||
;;
|
||||
--cache-file)
|
||||
|
@ -364,6 +361,7 @@ echo "FLAGS: --disable-debugger --with-sysmagic --disable-threads --disabl
|
|||
"--with-capstone4") USE_CS4="1"; ;;
|
||||
"--with-syscapstone") WITH_CAPSTONE="1"; ;;
|
||||
"--with-syslz4") WITH_SYSLZ4="1"; ;;
|
||||
"--with-smallz4") WITH_SMALLZ4="1"; ;;
|
||||
"--with-syszip") USE_ZIP="1"; ;;
|
||||
"--with-sysxxhash") USE_XXHASH="1"; ;;
|
||||
"--without-gpl") WITH_GPL="0"; ;;
|
||||
|
@ -393,7 +391,7 @@ parse_options "$1"
|
|||
shift
|
||||
done
|
||||
|
||||
ENVWORDS="MANDIR DESCRIPTION INFODIR LIBDIR INCLUDEDIR LOCALSTATEDIR ETCDIR SYSCONFDIR DATADIR DOCDIR LIBEXECDIR SBINDIR BINDIR EPREFIX PREFIX SPREFIX TARGET HOST BUILD INSTALL INSTALL_LIB INSTALL_MAN INSTALL_PROGRAM INSTALL_PROGRAM_STRIP INSTALL_DIR INSTALL_SCRIPT INSTALL_DATA HOST_OS HOST_CPU BUILD_OS BUILD_CPU TARGET_OS TARGET_CPU VERSION VERSION_MAJOR VERSION_MINOR VERSION_PATCH VERSION_NUMBER PKGCFG_LIBDIR PKGCFG_INCDIR PKGNAME VPATH CONTACT CONTACT_NAME CONTACT_MAIL CC CFLAGS CPPFLAGS LDFLAGS HAVE_LANG_C DEBUGGER HAVE_LIB_DL DL_LIBS PKGCONFIG HAVE_PATCH PATCH HAVE_AR AR HAVE_GIT GIT HAVE_GPERF GPERF HAVE_LIB_MAGIC HAVE_LINUX_CAN_H USE_MAGIC USE_LIB_MAGIC LIBMAGIC WANT_THREADS LOADLIBS R_CRITICAL_ENABLED WANT_DYLINK HAVE_FORK WANT_PTRACE_WRAP WANT_GPERF WANT_CAPSTONE NEW_IO_CACHE WITH_LIBR WITH_STATIC_THEMES USE_CS5 USE_CS4 WITH_CAPSTONE CAPSTONE_CFLAGS CAPSTONE_LDFLAGS HAVE_PKGCFG_CAPSTONE USE_CAPSTONE LZ4_CFLAGS LZ4_LDFLAGS HAVE_PKGCFG_LIBLZ4 WITH_SYSLZ4 USE_SYSLZ4 HAVE_LIB_Z HAVE_LIB_ZIP USE_ZIP USE_LIB_ZIP LIBZIP HAVE_LIB_XXHASH USE_XXHASH USE_LIB_XXHASH LIBXXHASH WITH_GPL HAVE_DECL_ADDR_NO_RANDOMIZE HAVE_DECL___GLIBC__ HAVE_ARC4RANDOM_UNIFORM HAVE_EXPLICIT_BZERO HAVE_EXPLICIT_MEMSET HAVE_CLOCK_NANOSLEEP HAVE_SIGACTION HAVE_CLOCK_GETTIME CLOCK_LDFLAGS SUPPORT_GNU99 HAVE_LIB_GMP HAVE_LIB_SSL SSL_CFLAGS SSL_LDFLAGS HAVE_PKGCFG_OPENSSL HAVE_OPENSSL WANT_SSL WANT_SSL_CRYPTO WANT_LIBUV HAVE_LIBUV_VERSION_1_0_0 LIBUV_CFLAGS LIBUV_LDFLAGS HAVE_PKGCFG_LIBUV HAVE_LIBUV USE_RPATH USERCC USEROSTYPE LIBVERSION HAVE_JEMALLOC HAVE_PTRACE USE_PTRACE_WRAP R_CHECKS_LEVEL"
|
||||
ENVWORDS="MANDIR DESCRIPTION INFODIR LIBDIR INCLUDEDIR LOCALSTATEDIR ETCDIR SYSCONFDIR DATADIR DOCDIR LIBEXECDIR SBINDIR BINDIR EPREFIX PREFIX SPREFIX TARGET HOST BUILD INSTALL INSTALL_LIB INSTALL_MAN INSTALL_PROGRAM INSTALL_PROGRAM_STRIP INSTALL_DIR INSTALL_SCRIPT INSTALL_DATA HOST_OS HOST_CPU BUILD_OS BUILD_CPU TARGET_OS TARGET_CPU VERSION VERSION_MAJOR VERSION_MINOR VERSION_PATCH VERSION_NUMBER PKGCFG_LIBDIR PKGCFG_INCDIR PKGNAME VPATH CONTACT CONTACT_NAME CONTACT_MAIL CC CFLAGS CPPFLAGS LDFLAGS HAVE_LANG_C DEBUGGER HAVE_LIB_DL DL_LIBS PKGCONFIG HAVE_PATCH PATCH HAVE_AR AR HAVE_GIT GIT HAVE_GPERF GPERF HAVE_LIB_MAGIC HAVE_LINUX_CAN_H USE_MAGIC USE_LIB_MAGIC LIBMAGIC WANT_THREADS LOADLIBS R_CRITICAL_ENABLED WANT_DYLINK HAVE_FORK WANT_PTRACE_WRAP WANT_GPERF WANT_CAPSTONE NEW_IO_CACHE WITH_LIBR WITH_STATIC_THEMES USE_CS5 USE_CS4 WITH_CAPSTONE CAPSTONE_CFLAGS CAPSTONE_LDFLAGS HAVE_PKGCFG_CAPSTONE USE_CAPSTONE LZ4_CFLAGS LZ4_LDFLAGS HAVE_PKGCFG_LIBLZ4 WITH_SYSLZ4 USE_SYSLZ4 WITH_SMALLZ4 USE_SMALLZ4 HAVE_LIB_Z HAVE_LIB_ZIP USE_ZIP USE_LIB_ZIP LIBZIP HAVE_LIB_XXHASH USE_XXHASH USE_LIB_XXHASH LIBXXHASH WITH_GPL HAVE_DECL_ADDR_NO_RANDOMIZE HAVE_DECL___GLIBC__ HAVE_ARC4RANDOM_UNIFORM HAVE_EXPLICIT_BZERO HAVE_EXPLICIT_MEMSET HAVE_CLOCK_NANOSLEEP HAVE_SIGACTION HAVE_CLOCK_GETTIME CLOCK_LDFLAGS SUPPORT_GNU99 HAVE_LIB_GMP HAVE_LIB_SSL SSL_CFLAGS SSL_LDFLAGS HAVE_PKGCFG_OPENSSL HAVE_OPENSSL WANT_SSL WANT_SSL_CRYPTO WANT_LIBUV HAVE_LIBUV_VERSION_1_0_0 LIBUV_CFLAGS LIBUV_LDFLAGS HAVE_PKGCFG_LIBUV HAVE_LIBUV USE_RPATH USERCC USEROSTYPE LIBVERSION HAVE_JEMALLOC HAVE_PTRACE USE_PTRACE_WRAP R_CHECKS_LEVEL"
|
||||
|
||||
create_environ
|
||||
|
||||
|
@ -665,6 +663,10 @@ if [ 11 = "$WITH_SYSLZ4$LZ4_CFLAGS" ]; then
|
|||
USE_SYSLZ4="1"
|
||||
else
|
||||
USE_SYSLZ4="0"; fi
|
||||
if [ "$WITH_SMALLZ4" = "1" ]; then
|
||||
USE_SMALLZ4="1"
|
||||
else
|
||||
USE_SMALLZ4="0"; fi
|
||||
check_library HAVE_LIB_Z z 0
|
||||
check_library HAVE_LIB_ZIP zip 0
|
||||
if [ 11 = "$HAVE_LIB_Z$USE_ZIP" ]; then
|
||||
|
@ -979,7 +981,7 @@ do_remove
|
|||
if [ "$QUIET" = 0 ]; then
|
||||
echo
|
||||
echo "Final report:"
|
||||
for A in BUILD CC CFLAGS DEBUGGER HAVE_ARC4RANDOM_UNIFORM HAVE_EXPLICIT_BZERO HAVE_EXPLICIT_MEMSET HAVE_FORK HAVE_GPERF HAVE_LIBUV HAVE_LIB_GMP HAVE_OPENSSL WANT_SSL_CRYPTO HAVE_PTRACE HOST LDFLAGS LIBVERSION PKGCONFIG PREFIX R_CHECKS_LEVEL TARGET USERCC USEROSTYPE USE_CAPSTONE USE_LIB_MAGIC NEW_IO_CACHE USE_LIB_XXHASH USE_LIB_ZIP USE_PTRACE_WRAP USE_SYSLZ4 VERSION WANT_DYLINK ; do
|
||||
for A in BUILD CC CFLAGS DEBUGGER HAVE_ARC4RANDOM_UNIFORM HAVE_EXPLICIT_BZERO HAVE_EXPLICIT_MEMSET HAVE_FORK HAVE_GPERF HAVE_LIBUV HAVE_LIB_GMP HAVE_OPENSSL WANT_SSL_CRYPTO HAVE_PTRACE HOST LDFLAGS LIBVERSION PKGCONFIG PREFIX R_CHECKS_LEVEL TARGET USERCC USEROSTYPE USE_CAPSTONE USE_LIB_MAGIC NEW_IO_CACHE USE_LIB_XXHASH USE_LIB_ZIP USE_PTRACE_WRAP USE_SYSLZ4 VERSION WANT_DYLINK USE_SMALLZ4 ; do
|
||||
eval VAL="\$${A}"
|
||||
[ -z "${VAL}" ] && VAL="\"\""
|
||||
echo " - ${A} = ${VAL}"
|
||||
|
|
|
@ -73,6 +73,14 @@ IFAND WITH_SYSLZ4 LZ4_CFLAGS {
|
|||
|
||||
}
|
||||
|
||||
(( SMALLZ4 ))
|
||||
ARG_WITH WITH_SMALLZ4 smallz4 build with smallz4 use it as the lz4 decompressor ;
|
||||
IF WITH_SMALLZ4 {
|
||||
USE_SMALLZ4 = 1 ;
|
||||
}{
|
||||
USE_SMALLZ4 = 0 ;
|
||||
}
|
||||
|
||||
(( ZIP ))
|
||||
CHKLIB z
|
||||
CHKLIB zip
|
||||
|
@ -283,7 +291,7 @@ REPORT
|
|||
HAVE_EXPLICIT_MEMSET HAVE_FORK HAVE_GPERF HAVE_LIBUV HAVE_LIB_GMP
|
||||
HAVE_OPENSSL WANT_SSL_CRYPTO HAVE_PTRACE HOST LDFLAGS LIBVERSION PKGCONFIG PREFIX
|
||||
R_CHECKS_LEVEL TARGET USERCC USEROSTYPE USE_CAPSTONE USE_LIB_MAGIC NEW_IO_CACHE
|
||||
USE_LIB_XXHASH USE_LIB_ZIP USE_PTRACE_WRAP USE_SYSLZ4 VERSION WANT_DYLINK
|
||||
USE_LIB_XXHASH USE_LIB_ZIP USE_PTRACE_WRAP USE_SYSLZ4 VERSION WANT_DYLINK USE_SMALLZ4
|
||||
;
|
||||
|
||||
PKGCFG_DO pkgcfg/r_egg.pc r_egg r_asm r_syscall r_util r_reg r_flag r_cons ;
|
||||
|
|
|
@ -37,6 +37,11 @@ OBJS+=big_gmp.o
|
|||
OBJS+=big_ssl.o
|
||||
OBJS+=big.o
|
||||
|
||||
ifeq ($(USE_SMALLZ4),1)
|
||||
CFLAGS+=-DUSE_SMALLZ4
|
||||
OBJS+=$(SHLR)/smallz4/smallz4cat.o
|
||||
endif
|
||||
|
||||
CWD=$(shell pwd)
|
||||
|
||||
LDFLAGS+=${BN_LIBS}
|
||||
|
|
|
@ -7,6 +7,36 @@
|
|||
// set a maximum output buffer of 50MB
|
||||
#define MAXOUT 50000000
|
||||
|
||||
#if USE_SMALLZ4
|
||||
#include "../../../shlr/smallz4/smallz4cat.h"
|
||||
|
||||
struct UserPtr {
|
||||
const ut8 * input;
|
||||
ut64 inputPos;
|
||||
ut8 * output;
|
||||
ut64 outputPos;
|
||||
ut32 * outputSize;
|
||||
int error;
|
||||
};
|
||||
|
||||
void smallz4Write(const unsigned char* data, unsigned int numBytes, void *userPtr) {
|
||||
struct UserPtr* user = (struct UserPtr*)userPtr;
|
||||
if (data != NULL && numBytes > 0) {
|
||||
if (*(user->outputSize) - user->outputPos < numBytes) {
|
||||
user->error = -1;
|
||||
return;
|
||||
}
|
||||
memcpy(user->output + user->outputPos, data, numBytes);
|
||||
user->outputPos += numBytes;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned char smallz4GetByte(void *userPtr) {
|
||||
struct UserPtr* user = (struct UserPtr*)userPtr;
|
||||
return *(user->input + (user->inputPos++));
|
||||
}
|
||||
#endif
|
||||
|
||||
static const char *gzerr(int n) {
|
||||
const char * const errors[] = {
|
||||
"",
|
||||
|
@ -89,8 +119,23 @@ R_API ut8 *r_inflate_lz4(const ut8 *src, int srcLen, int *consumed, int *dstLen)
|
|||
if (!obuf) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if USE_SMALLZ4
|
||||
struct UserPtr user =
|
||||
{
|
||||
.input = src,
|
||||
.inputPos = 0,
|
||||
.output = obuf,
|
||||
.outputPos = 0,
|
||||
.outputSize = &osz,
|
||||
.error = 0
|
||||
};
|
||||
int res = unlz4Block_userPtr (smallz4GetByte, smallz4Write, &user, srcLen, NULL, NULL);
|
||||
if (res < 1 || user.error != 0) {
|
||||
#else
|
||||
int res = LZ4_decompress_safe ((const char*)src, (char*)obuf, (uint32_t) srcLen, (uint32_t) osz);
|
||||
if (res < 1) {
|
||||
#endif
|
||||
int mul = srcLen / -res;
|
||||
int nosz = osz * (5 * (mul + 1));
|
||||
if (nosz < osz) {
|
||||
|
@ -104,8 +149,19 @@ R_API ut8 *r_inflate_lz4(const ut8 *src, int srcLen, int *consumed, int *dstLen)
|
|||
}
|
||||
obuf = nbuf;
|
||||
osz = nosz;
|
||||
#if USE_SMALLZ4
|
||||
user.output = obuf;
|
||||
user.inputPos = 0;
|
||||
user.outputPos = 0;
|
||||
user.error = 0;
|
||||
res = unlz4Block_userPtr (smallz4GetByte, smallz4Write, &user, srcLen, NULL, NULL);
|
||||
}
|
||||
user.output = NULL;
|
||||
user.input = NULL;
|
||||
#else
|
||||
}
|
||||
res = LZ4_decompress_safe ((const char*)src, (char*)obuf, (uint32_t) srcLen, (uint32_t) osz);
|
||||
#endif
|
||||
if (res > 0) {
|
||||
*dstLen = res;
|
||||
*consumed = srcLen;
|
||||
|
|
|
@ -0,0 +1,10 @@
|
|||
CFLAGS = -O2 -Wall -pedantic -s -std=c99
|
||||
|
||||
smallz4cat.o:
|
||||
$(CC) $(CCFLAGS) $(EXTRAFLAGS) -c smallz4cat.c
|
||||
|
||||
all32: CFLAGS+=-m32
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
@$(RM) *.o
|
|
@ -0,0 +1,435 @@
|
|||
// //////////////////////////////////////////////////////////
|
||||
// smallz4cat.c
|
||||
// Copyright (c) 2016-2019 Stephan Brumme. All rights reserved.
|
||||
// see https://create.stephan-brumme.com/smallz4/
|
||||
//
|
||||
// "MIT License":
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
// of this software and associated documentation files (the "Software"),
|
||||
// to deal in the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the Software
|
||||
// is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included
|
||||
// in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
|
||||
// INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
||||
// PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
||||
// OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
// This program is a shorter, more readable, albeit slower re-implementation of lz4cat ( https://github.com/Cyan4973/xxHash )
|
||||
|
||||
// compile: gcc smallz4cat.c -O3 -o smallz4cat -Wall -pedantic -std=c99 -s
|
||||
// The static 8k binary was compiled using Clang and dietlibc (see https://www.fefe.de/dietlibc/ )
|
||||
|
||||
// Limitations:
|
||||
// - skippable frames and legacy frames are not implemented (and most likely never will)
|
||||
// - checksums are not verified (see https://create.stephan-brumme.com/xxhash/ for a simple implementation)
|
||||
|
||||
// Replace getByteFromIn() and sendToOut() by your own code if you need in-memory LZ4 decompression.
|
||||
// Corrupted data causes a call to unlz4error().
|
||||
|
||||
// suppress warnings when compiled by Visual C++
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
|
||||
#include <stdio.h> // stdin/stdout/stderr, fopen, ...
|
||||
#include <stdlib.h> // exit()
|
||||
#include <string.h> // memcpy
|
||||
|
||||
#ifndef FALSE
|
||||
#define FALSE 0
|
||||
#define TRUE 1
|
||||
#endif
|
||||
|
||||
/// error handler
|
||||
static void unlz4error(const char* msg)
|
||||
{
|
||||
// smaller static binary than fprintf(stderr, "ERROR: %s\n", msg);
|
||||
fputs ("ERROR: ", stderr);
|
||||
fputs (msg, stderr);
|
||||
fputc ('\n', stderr);
|
||||
}
|
||||
|
||||
|
||||
// ==================== I/O INTERFACE ====================
|
||||
|
||||
|
||||
// read one byte from input, see getByteFromIn() for a basic implementation
|
||||
typedef unsigned char (*GET_BYTE) (void* userPtr);
|
||||
// write several bytes, see sendBytesToOut() for a basic implementation
|
||||
typedef void (*SEND_BYTES)(const unsigned char*, unsigned int, void* userPtr);
|
||||
|
||||
int unlz4Block_userPtr (GET_BYTE getByte, SEND_BYTES sendBytes, void *userPtr, unsigned int blockSize, unsigned int *position, unsigned char *hist);
|
||||
|
||||
struct UserPtr
|
||||
{
|
||||
// file handles
|
||||
FILE *in;
|
||||
FILE *out;
|
||||
// modify input buffer size as you like ... for most use cases, bigger buffer aren't faster anymore - and even reducing to 1 byte works !
|
||||
#define READ_BUFFER_SIZE 4*1024
|
||||
unsigned char readBuffer[READ_BUFFER_SIZE];
|
||||
unsigned int pos;
|
||||
unsigned int available;
|
||||
};
|
||||
|
||||
/// read a single byte (with simple buffering)
|
||||
static unsigned char getByteFromIn(void* userPtr) // parameter "userPtr" not needed
|
||||
{
|
||||
/// cast user-specific data
|
||||
struct UserPtr *user = (struct UserPtr *)userPtr;
|
||||
|
||||
// refill buffer
|
||||
if (user->pos == user->available) {
|
||||
user->pos = 0;
|
||||
user->available = fread (user->readBuffer, 1, READ_BUFFER_SIZE, user->in);
|
||||
if (user->available == 0)
|
||||
unlz4error ("out of data");
|
||||
}
|
||||
|
||||
// return a byte
|
||||
return user->readBuffer[user->pos++];
|
||||
}
|
||||
|
||||
/// write a block of bytes
|
||||
static void sendBytesToOut(const unsigned char* data, unsigned int numBytes, void* userPtr)
|
||||
{
|
||||
/// cast user-specific data
|
||||
struct UserPtr *user = (struct UserPtr *)userPtr;
|
||||
if (data != NULL && numBytes > 0)
|
||||
fwrite (data, 1, numBytes, user->out);
|
||||
}
|
||||
|
||||
|
||||
// ==================== LZ4 DECOMPRESSOR ====================
|
||||
|
||||
|
||||
/// decompress everything in input stream (accessed via getByte) and write to output stream (via sendBytes)
|
||||
int unlz4_userPtr(GET_BYTE getByte, SEND_BYTES sendBytes, const char* dictionary, void* userPtr)
|
||||
{
|
||||
// signature
|
||||
unsigned char signature1 = getByte (userPtr);
|
||||
unsigned char signature2 = getByte (userPtr);
|
||||
unsigned char signature3 = getByte (userPtr);
|
||||
unsigned char signature4 = getByte (userPtr);
|
||||
unsigned int signature = (signature4 << 24) | (signature3 << 16) | (signature2 << 8) | signature1;
|
||||
unsigned char isModern = (signature == 0x184D2204);
|
||||
unsigned char isLegacy = (signature == 0x184C2102);
|
||||
if (!isModern && !isLegacy) {
|
||||
unlz4error ("invalid signature");
|
||||
return -1;
|
||||
}
|
||||
|
||||
unsigned char hasBlockChecksum = FALSE;
|
||||
unsigned char hasContentSize = FALSE;
|
||||
unsigned char hasContentChecksum = FALSE;
|
||||
unsigned char hasDictionaryID = FALSE;
|
||||
if (isModern) {
|
||||
// flags
|
||||
unsigned char flags = getByte (userPtr);
|
||||
hasBlockChecksum = flags & 16;
|
||||
hasContentSize = flags & 8;
|
||||
hasContentChecksum = flags & 4;
|
||||
hasDictionaryID = flags & 1;
|
||||
|
||||
// only version 1 file format
|
||||
unsigned char version = flags >> 6;
|
||||
if (version != 1) {
|
||||
unlz4error ("only LZ4 file format version 1 supported");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// ignore blocksize
|
||||
char numIgnore = 1;
|
||||
|
||||
// ignore, skip 8 bytes
|
||||
if (hasContentSize)
|
||||
numIgnore += 8;
|
||||
// ignore, skip 4 bytes
|
||||
if (hasDictionaryID)
|
||||
numIgnore += 4;
|
||||
|
||||
// ignore header checksum (xxhash32 of everything up this point & 0xFF)
|
||||
numIgnore++;
|
||||
|
||||
// skip all those ignored bytes
|
||||
while (numIgnore--)
|
||||
getByte (userPtr);
|
||||
}
|
||||
|
||||
// don't lower this value, backreferences can be 64kb far away
|
||||
#define HISTORY_SIZE 64 * 1024
|
||||
// contains the latest decoded data
|
||||
unsigned char history[HISTORY_SIZE];
|
||||
// next free position in history[]
|
||||
unsigned int pos = 0;
|
||||
|
||||
// dictionary compression is a recently introduced feature, just move its contents to the buffer
|
||||
if (dictionary != NULL) {
|
||||
// open dictionary
|
||||
FILE *dict = fopen (dictionary, "rb");
|
||||
if (!dict) {
|
||||
unlz4error ("cannot open dictionary");
|
||||
return -1;
|
||||
}
|
||||
|
||||
// get dictionary's filesize
|
||||
fseek (dict, 0, SEEK_END);
|
||||
long dictSize = ftell (dict);
|
||||
// only the last 64k are relevant
|
||||
long relevant = dictSize < 65536? 0: dictSize - 65536;
|
||||
fseek (dict, relevant, SEEK_SET);
|
||||
if (dictSize > 65536)
|
||||
dictSize = 65536;
|
||||
// read it and store it at the end of the buffer
|
||||
fread (history + HISTORY_SIZE - dictSize, 1, dictSize, dict);
|
||||
fclose (dict);
|
||||
}
|
||||
|
||||
// parse all blocks until blockSize == 0
|
||||
while (1) {
|
||||
// block size
|
||||
unsigned int blockSize = getByte (userPtr);
|
||||
blockSize |= (unsigned int)getByte (userPtr) << 8;
|
||||
blockSize |= (unsigned int)getByte (userPtr) << 16;
|
||||
blockSize |= (unsigned int)getByte (userPtr) << 24;
|
||||
|
||||
// highest bit set ?
|
||||
unsigned char isCompressed = isLegacy || (blockSize & 0x80000000) == 0;
|
||||
if (isModern)
|
||||
blockSize &= 0x7FFFFFFF;
|
||||
|
||||
// stop after last block
|
||||
if (blockSize == 0)
|
||||
break;
|
||||
|
||||
if (isCompressed) {
|
||||
// decompress block
|
||||
|
||||
int numWritten = unlz4Block_userPtr (getByte, sendBytes, userPtr, blockSize, &pos, history);
|
||||
|
||||
// all legacy blocks must be completely filled - except for the last one
|
||||
if (isLegacy && numWritten + pos < 8 * 1024 * 1024)
|
||||
break;
|
||||
} else {
|
||||
// copy uncompressed data and add to history, too (if next block is compressed and some matches refer to this block)
|
||||
while (blockSize-- > 0) {
|
||||
// copy a byte ...
|
||||
history[pos++] = getByte (userPtr);
|
||||
// ... until buffer is full => send to output
|
||||
if (pos == HISTORY_SIZE) {
|
||||
sendBytes (history, HISTORY_SIZE, userPtr);
|
||||
pos = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hasBlockChecksum) {
|
||||
// ignore checksum, skip 4 bytes
|
||||
getByte (userPtr);
|
||||
getByte (userPtr);
|
||||
getByte (userPtr);
|
||||
getByte (userPtr);
|
||||
}
|
||||
}
|
||||
|
||||
if (hasContentChecksum) {
|
||||
// ignore checksum, skip 4 bytes
|
||||
getByte (userPtr);
|
||||
getByte (userPtr);
|
||||
getByte (userPtr);
|
||||
getByte (userPtr);
|
||||
}
|
||||
|
||||
// flush output buffer
|
||||
sendBytes (history, pos, userPtr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// decompress an lz4 block
|
||||
int unlz4Block_userPtr (GET_BYTE getByte, SEND_BYTES sendBytes, void *userPtr, unsigned int blockSize, unsigned int *position, unsigned char *hist) {
|
||||
|
||||
// contains the latest decoded data
|
||||
unsigned char history[HISTORY_SIZE];
|
||||
// next free position in history[]
|
||||
unsigned int pos = 0;
|
||||
|
||||
// better way to do this?
|
||||
if (position != NULL && hist != NULL) {
|
||||
pos = *position;
|
||||
memcpy (history, hist, pos);
|
||||
}
|
||||
|
||||
// decompress block
|
||||
unsigned int blockOffset = 0;
|
||||
unsigned int numWritten = 0;
|
||||
while (blockOffset < blockSize) {
|
||||
// get a token
|
||||
unsigned char token = getByte (userPtr);
|
||||
blockOffset++;
|
||||
|
||||
// determine number of literals
|
||||
unsigned int numLiterals = token >> 4;
|
||||
if (numLiterals == 15) {
|
||||
// number of literals length encoded in more than 1 byte
|
||||
unsigned char current;
|
||||
do {
|
||||
current = getByte (userPtr);
|
||||
numLiterals += current;
|
||||
blockOffset++;
|
||||
} while (current == 255);
|
||||
}
|
||||
|
||||
blockOffset += numLiterals;
|
||||
|
||||
// copy all those literals
|
||||
if (pos + numLiterals < HISTORY_SIZE) {
|
||||
// fast loop
|
||||
while (numLiterals-- > 0)
|
||||
history[pos++] = getByte (userPtr);
|
||||
} else {
|
||||
// slow loop
|
||||
while (numLiterals-- > 0) {
|
||||
history[pos++] = getByte (userPtr);
|
||||
|
||||
// flush output buffer
|
||||
if (pos == HISTORY_SIZE) {
|
||||
sendBytes (history, HISTORY_SIZE, userPtr);
|
||||
numWritten += HISTORY_SIZE;
|
||||
pos = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// last token has only literals
|
||||
if (blockOffset == blockSize)
|
||||
break;
|
||||
|
||||
// match distance is encoded in two bytes (little endian)
|
||||
unsigned int delta = getByte (userPtr);
|
||||
delta |= (unsigned int)getByte (userPtr) << 8;
|
||||
// zero isn't allowed
|
||||
if (delta == 0) {
|
||||
unlz4error ("invalid offset");
|
||||
return -1;
|
||||
}
|
||||
blockOffset += 2;
|
||||
|
||||
// match length (always >= 4, therefore length is stored minus 4)
|
||||
unsigned int matchLength = 4 + (token & 0x0F);
|
||||
if (matchLength == 4 + 0x0F) {
|
||||
unsigned char current;
|
||||
do // match length encoded in more than 1 byte
|
||||
{
|
||||
current = getByte (userPtr);
|
||||
matchLength += current;
|
||||
blockOffset++;
|
||||
} while (current == 255);
|
||||
}
|
||||
|
||||
// copy match
|
||||
unsigned int referencePos = (pos >= delta)? (pos - delta): (HISTORY_SIZE + pos - delta);
|
||||
// start and end within the current 64k block ?
|
||||
if (pos + matchLength < HISTORY_SIZE && referencePos + matchLength < HISTORY_SIZE) {
|
||||
// read/write continuous block (no wrap-around at the end of history[])
|
||||
// fast copy
|
||||
if (pos >= referencePos + matchLength || referencePos >= pos + matchLength) {
|
||||
// non-overlapping
|
||||
memcpy (history + pos, history + referencePos, matchLength);
|
||||
pos += matchLength;
|
||||
} else {
|
||||
// overlapping, slower byte-wise copy
|
||||
while (matchLength-- > 0)
|
||||
history[pos++] = history[referencePos++];
|
||||
}
|
||||
} else {
|
||||
// either read or write wraps around at the end of history[]
|
||||
while (matchLength-- > 0) {
|
||||
// copy single byte
|
||||
history[pos++] = history[referencePos++];
|
||||
|
||||
// cannot write anymore ? => wrap around
|
||||
if (pos == HISTORY_SIZE) {
|
||||
// flush output buffer
|
||||
sendBytes (history, HISTORY_SIZE, userPtr);
|
||||
numWritten += HISTORY_SIZE;
|
||||
pos = 0;
|
||||
}
|
||||
// wrap-around of read location
|
||||
referencePos %= HISTORY_SIZE;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// flush output buffer
|
||||
if (pos > 0) {
|
||||
sendBytes (history, pos, userPtr);
|
||||
numWritten += pos;
|
||||
pos = 0;
|
||||
}
|
||||
|
||||
// If we flushed everything, it's normal that position = 0
|
||||
// and nothing gets copied back into history.
|
||||
if (position != NULL && hist != NULL) {
|
||||
*position = pos;
|
||||
memcpy (hist, history, pos);
|
||||
}
|
||||
return numWritten;
|
||||
}
|
||||
|
||||
/// old interface where getByte and sendBytes use global file handles
|
||||
void unlz4(GET_BYTE getByte, SEND_BYTES sendBytes, const char* dictionary)
|
||||
{
|
||||
unlz4_userPtr (getByte, sendBytes, dictionary, NULL);
|
||||
}
|
||||
|
||||
|
||||
// ==================== COMMAND-LINE HANDLING ====================
|
||||
|
||||
|
||||
/// parse command-line
|
||||
int main(int argc, const char* argv[])
|
||||
{
|
||||
// default input/output streams
|
||||
struct UserPtr user = {
|
||||
.in = stdin,
|
||||
.out = stdout,
|
||||
.pos = 0, // initial input buffer is empty
|
||||
.available = 0
|
||||
};
|
||||
|
||||
const char *dictionary = NULL;
|
||||
|
||||
// first command-line parameter is our input filename / but ignore "-" which stands for STDIN
|
||||
int parameter;
|
||||
for (parameter = 1; parameter < argc; parameter++) {
|
||||
const char *current = argv[parameter];
|
||||
// dictionary
|
||||
if (current[0] == '-' && current[1] == 'D') {
|
||||
if (parameter + 1 >= argc)
|
||||
unlz4error ("no dictionary filename found");
|
||||
dictionary = argv[++parameter];
|
||||
continue;
|
||||
}
|
||||
|
||||
// filename
|
||||
// read from STDIN, default behavior
|
||||
if (current[0] != '-' && current[1] != '\0') {
|
||||
// already have a filename - at most one filename is allowed (except for dictionary) ?
|
||||
if (user.in != stdin)
|
||||
unlz4error ("can only decompress one file at a time");
|
||||
// get handle
|
||||
user.in = fopen (argv[1], "rb");
|
||||
if (!user.in)
|
||||
unlz4error ("file not found");
|
||||
}
|
||||
}
|
||||
|
||||
// and go !
|
||||
unlz4_userPtr (getByteFromIn, sendBytesToOut, dictionary, &user);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
typedef unsigned char (*GET_BYTE) (void* userPtr);
|
||||
typedef void (*SEND_BYTES)(const unsigned char*, unsigned int, void* userPtr);
|
||||
int unlz4Block_userPtr (GET_BYTE getByte, SEND_BYTES sendBytes, void *userPtr, unsigned int blockSize, unsigned int *position, unsigned char *hist);
|
Loading…
Reference in New Issue