First version of a support utility to provide generalized compression in

LLVM that handles availability and unavailability of bzip2 and zlib.

llvm-svn: 16648
This commit is contained in:
Reid Spencer 2004-10-04 10:49:41 +00:00
parent d1ab378be5
commit c89503753c
2 changed files with 625 additions and 0 deletions

View File

@ -0,0 +1,99 @@
//===- llvm/Support/Compressor.h --------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by Reid Spencer and is distributed under the
// University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the llvm::Compressor class.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_SUPPORT_COMPRESSOR_H
#define LLVM_SUPPORT_COMPRESSOR_H
#include <llvm/Support/DataTypes.h>
namespace llvm {
/// This class provides an abstraction for compressing a block of memory using
/// a standard compression utility such as bzip2 or libz. This interface
/// allos us to abstraction the notion of compression and deal with alternate
/// compression scheme availability depending on the configured platform. This
/// facility will always favor a bzip2 implementation if its available.
/// Otherwise, libz will be used if its available. If neither zlib nor bzip2
/// are available, a very simple algorithm provided by the Compressor class
/// will be used The type of compression used can be determined by inspecting
/// the first byte of the compressed output. ASCII values '0', '1', and '2',
/// denote the compression type as given in the Algorithm enumeration below.
/// The Compressor is intended for use with memory mapped files where the
/// entire data block to be compressed or decompressed is available in
/// memory. Output, however, can be gathered in repeated calls to a callback.
/// @since 1.4
/// @brief An abstraction for memory to memory data compression
class Compressor {
/// @name Types
/// @{
public:
enum Algorithm {
COMP_TYPE_SIMPLE = '0', ///< Use simple but ubiquitous algorithm
COMP_TYPE_ZLIB = '1', ///< Use zlib algorithm, if available
COMP_TYPE_BZIP2 = '2', ///< Use bzip2 algorithm (preferred)
};
/// A callback function type used by the Compressor to get the next chunk
/// of data to which (de)compressed output will be written. This function
/// must be written by the caller to provide the buffering of the output
/// data.
/// @returns 0 for success, 1 for failure
/// @throws nothing
/// @brief Output callback function type
typedef unsigned (OutputDataCallback)(char*& buffer, unsigned& size);
/// @}
/// @name Methods
/// @{
public:
/// This function does the compression work. The block of memory starting
/// at \p in and extending for \p size bytes is compressed. The compressed
/// output is written to memory blocks returned by the \p cb callback. The
/// caller must provide an implementation of the OutputDataCallback
/// function type and provide its address as \p cb. Note that the callback
/// function will be called as many times as necessary to complete the
/// compression of the \p in block but that the total size will generally
/// be less than \p size. It is a good idea to provide as large a value to
/// the callback's \p size parameter as possible so that fewer calls to
/// the callback are made. The \p hint parameter tells the function which
/// kind of compression to start with. However, if its not available on
/// the platform, the algorithm "falls back" from bzip2 -> zlib -> simple.
/// @throws std::string if an error occurs
/// @returns the total size of the compressed data
/// @brief Compress a block of memory.
static uint64_t compress(char* in, unsigned size, OutputDataCallback* cb,
Algorithm hint = COMP_TYPE_BZIP2);
/// This function does the decompression work. The block of memory
/// starting at \p in and extending for \p size bytes is decompressed. The
/// decompressed output is written to memory blocks returned by the \p cb
/// callback. The caller must provide an implementation of the
/// OutputDataCallback function type and provide its address as \p cb.
/// Note that the callback function will be called as many times as
/// necessary to complete the compression of the \p in block but that the
/// total size will generally be greater than \p size. It is a good idea
/// to provide as large a value to the callback's \p size parameter as
/// possible so that fewer calls to the callback are made.
/// @throws std::string if an error occurs
/// @returns the total size of the decompressed data
/// @brief Decompress a block of memory.
static uint64_t decompress(char *in, unsigned size,
OutputDataCallback* cb);
/// @}
};
}
// vim: sw=2 ai
#endif

View File

@ -0,0 +1,526 @@
//===- lib/Support/Compressor.cpp -------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file was developed by Reid Spencer and is distributed under the
// University of Illinois Open Source License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the llvm::Compressor class, an abstraction for memory
// block compression.
//
//===----------------------------------------------------------------------===//
#include "llvm/Config/config.h"
#include "llvm/Support/Compressor.h"
#include "llvm/ADT/StringExtras.h"
#include <cassert>
#include <string>
#ifdef HAVE_BZIP2
#include <bzlib.h>
#endif
#ifdef HAVE_ZLIB
#include <zlib.h>
#endif
#ifndef HAVE_BZIP2
# ifndef HAVE_ZLIB
# warning No compression library is available!!
# endif
#endif
namespace {
inline int getdata(char*& buffer, unsigned& size,
llvm::Compressor::OutputDataCallback* cb) {
buffer = 0;
size = 0;
int result = (*cb)(buffer, size);
assert(buffer != 0 && "Invalid result from Compressor callback");
assert(size != 0 && "Invalid result from Compressor callback");
return result;
}
//===----------------------------------------------------------------------===//
//=== RLCOMP - very simple run length compression scheme
//=== The code below transforms the input into blocks that are either
//=== compressed or not. Each block starts with a header byte that provides
//=== the length of the block. Values < 128 are uncompressed, values >128
//=== are compressed. The value 128 is illegal. Currently, the algorithm is
//=== not completed and is #if'd out.
//===----------------------------------------------------------------------===//
enum {
MAX_RLCOMP_OUT = 32768
};
struct RLCOMP_stream {
// User provided fields
char* next_in;
unsigned avail_in;
char* next_out;
unsigned avail_out;
// Information fields
uint64_t output_count; // Total count of output bytes
#if 0
// Algorithm fields
unsigned block_len; // Length of current block
unsigned compression; // State of compression 0=no, 1=yes, 2=indeterminate
char buffer[128]; // compression buffer (not used for decompress)
unsigned buflen; // bytes in compression buffer
bool pending; // is data pending to be written
char pending_data; // the pending data
unsigned clen; // length of the compressed block
#endif
};
void RLCOMP_init(RLCOMP_stream* s) {
s->output_count = 0;
#if 0
s->block_len = 0;
s->compression = 2;
s->buflen = 0;
s->pending = false;
s->pending_data = 0;
s->clen = 0;
#endif
}
inline bool RLCOMP_getchar(RLCOMP_stream* s, unsigned& data) {
#if 0
if (s->avail_in) {
data = *s->next_in++;
s->avail_in--;
return true;
}
#endif
return false;
}
inline bool RLCOMP_putchar(RLCOMP_stream* s, unsigned data) {
#if 0
if (s->avail_out) {
*s->next_out++ = data;
s->avail_out--;
s->output_count++;
return true;
} else {
s->pending = true;
s->pending_data = data;
return false;
}
#else
return false;
#endif
}
bool RLCOMP_compress(RLCOMP_stream* s) {
assert(s && "Invalid RLCOMP_stream");
assert(s->next_in != 0);
assert(s->next_out != 0);
assert(s->avail_in >= 1);
assert(s->avail_out >= 1);
#if 0
// Handle pending data from the last time in
if (s->pending) {
RLCOMP_putchar(s,s->pending_data);
s->pending = false;
}
unsigned c = 0;
unsigned lastc = 0;
// Handle the degenerate len=1 case
if (!RLCOMP_getchar(s,lastc)) {
RLCOMP_putchar(s,1);
return RLCOMP_putchar(s,lastc);
}
while (RLCOMP_getchar(s,c)) {
switch(s->compression) {
case 0:
if (lastc == c) {
s->compression = 1;
s->clen = 2 ;
} else {
if (!RLCOMP_putchar(s, c))
return false;
}
break;
case 1:
if (lastc != c) {
s->compression = 2;
if (!RLCOMP_putchar(s, s->clen))
return false;
} else {
s->clen++;
}
break;
case 2:
break;
}
lastc = c;
}
#endif
if (s->avail_out >= s->avail_in) {
::memcpy(s->next_out, s->next_in, s->avail_in);
s->output_count += s->avail_in;
s->avail_out -= s->avail_in;
s->next_in += s->avail_in;
s->avail_in = 0;
return true;
} else {
::memcpy(s->next_out, s->next_in, s->avail_out);
s->output_count += s->avail_out;
s->avail_in -= s->avail_out;
s->next_in += s->avail_out;
s->avail_out = 0;
return false;
}
}
bool RLCOMP_decompress(RLCOMP_stream* s) {
assert(s && "Invalid RLCOMP_stream");
assert(s->next_in != 0);
assert(s->next_out != 0);
assert(s->avail_in >= 1);
assert(s->avail_out >= 1);
#if 0
unsigned c = 0;
while (RLCOMP_getchar(s,c)) {
switch(s->compression) {
case 0: // This is not a compressed block
s->block_len--;
if (!RLCOMP_putchar(s,c))
return false;
break;
case 1: // This is a comperssed block
while (s->block_len-- > 0)
if (!RLCOMP_putchar(s,c))
return false;
break;
case 2: // This is the length field
if (c < 128) {
s->compression = 0;
s->block_len = c;
} else {
s->compression = 1;
s->block_len = c - 128;
}
continue;
default: // oops!
throw std::string("Invalid compression state");
}
if (s->block_len <= 0)
s->compression = 2;
}
if (s->repeat > 0)
throw std::string("Invalid compression state");
#endif
if (s->avail_out >= s->avail_in) {
::memcpy(s->next_out, s->next_in, s->avail_in);
s->output_count += s->avail_in;
s->avail_out -= s->avail_in;
s->next_in += s->avail_in;
s->avail_in = 0;
return true;
} else {
::memcpy(s->next_out, s->next_in, s->avail_out);
s->output_count += s->avail_out;
s->avail_in -= s->avail_out;
s->next_in += s->avail_out;
s->avail_out = 0;
return false;
}
}
void RLCOMP_end(RLCOMP_stream* strm) {
}
}
namespace llvm {
// Compress in one of three ways
uint64_t Compressor::compress(char* in, unsigned size,
OutputDataCallback* cb, Algorithm hint) {
assert(in && "Can't compress null buffer");
assert(size && "Can't compress empty buffer");
assert(cb && "Can't compress without a callback function");
uint64_t result = 0;
switch (hint) {
case COMP_TYPE_BZIP2: {
#if defined(HAVE_BZIP2)
// Set up the bz_stream
bz_stream bzdata;
bzdata.bzalloc = 0;
bzdata.bzfree = 0;
bzdata.opaque = 0;
bzdata.next_in = in;
bzdata.avail_in = size;
bzdata.next_out = 0;
bzdata.avail_out = 0;
switch ( BZ2_bzCompressInit(&bzdata, 9, 0, 0) ) {
case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
case BZ_MEM_ERROR: throw std::string("Out of memory");
case BZ_OK:
default:
break;
}
// Get a block of memory
if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb)) {
BZ2_bzCompressEnd(&bzdata);
throw std::string("Can't allocate output buffer");
}
// Put compression code in first byte
(*bzdata.next_out++) = COMP_TYPE_BZIP2;
bzdata.avail_out--;
// Compress it
int bzerr = BZ_FINISH_OK;
while (BZ_FINISH_OK == (bzerr = BZ2_bzCompress(&bzdata, BZ_FINISH))) {
if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb)) {
BZ2_bzCompressEnd(&bzdata);
throw std::string("Can't allocate output buffer");
}
}
switch (bzerr) {
case BZ_SEQUENCE_ERROR:
case BZ_PARAM_ERROR: throw std::string("Param/Sequence error");
case BZ_FINISH_OK:
case BZ_STREAM_END: break;
default: throw std::string("Oops: ") + utostr(unsigned(bzerr));
}
// Finish
result = (static_cast<uint64_t>(bzdata.total_out_hi32) << 32) |
bzdata.total_out_lo32 + 1;
BZ2_bzCompressEnd(&bzdata);
break;
#else
// FALL THROUGH
#endif
}
case COMP_TYPE_ZLIB: {
#if defined(HAVE_ZLIB)
z_stream zdata;
zdata.zalloc = Z_NULL;
zdata.zfree = Z_NULL;
zdata.opaque = Z_NULL;
zdata.next_in = reinterpret_cast<Bytef*>(in);
zdata.avail_in = size;
if (Z_OK != deflateInit(&zdata,Z_BEST_COMPRESSION))
throw std::string(zdata.msg ? zdata.msg : "zlib error");
if (0 != getdata((char*&)(zdata.next_out), zdata.avail_out,cb)) {
deflateEnd(&zdata);
throw std::string("Can't allocate output buffer");
}
(*zdata.next_out++) = COMP_TYPE_ZLIB;
zdata.avail_out--;
int flush = 0;
while ( Z_OK == deflate(&zdata,0) && zdata.avail_out == 0) {
if (0 != getdata((char*&)zdata.next_out, zdata.avail_out, cb)) {
deflateEnd(&zdata);
throw std::string("Can't allocate output buffer");
}
}
while ( Z_STREAM_END != deflate(&zdata, Z_FINISH)) {
if (0 != getdata((char*&)zdata.next_out, zdata.avail_out, cb)) {
deflateEnd(&zdata);
throw std::string("Can't allocate output buffer");
}
}
result = static_cast<uint64_t>(zdata.total_out) + 1;
deflateEnd(&zdata);
break;
#else
// FALL THROUGH
#endif
}
case COMP_TYPE_SIMPLE: {
RLCOMP_stream sdata;
sdata.next_in = in;
sdata.avail_in = size;
RLCOMP_init(&sdata);
if (0 != getdata(sdata.next_out, sdata.avail_out,cb)) {
throw std::string("Can't allocate output buffer");
}
*(sdata.next_out++) = COMP_TYPE_SIMPLE;
sdata.avail_out--;
while (!RLCOMP_compress(&sdata)) {
if (0 != getdata(sdata.next_out, sdata.avail_out,cb)) {
throw std::string("Can't allocate output buffer");
}
}
result = sdata.output_count + 1;
RLCOMP_end(&sdata);
break;
}
default:
throw std::string("Invalid compression type hint");
}
return result;
}
// Decompress in one of three ways
uint64_t Compressor::decompress(char *in, unsigned size,
OutputDataCallback* cb) {
assert(in && "Can't decompress null buffer");
assert(size > 1 && "Can't decompress empty buffer");
assert(cb && "Can't decompress without a callback function");
uint64_t result = 0;
switch (*in++) {
case COMP_TYPE_BZIP2: {
#if !defined(HAVE_BZIP2)
throw std::string("Can't decompress BZIP2 data");
#else
// Set up the bz_stream
bz_stream bzdata;
bzdata.bzalloc = 0;
bzdata.bzfree = 0;
bzdata.opaque = 0;
bzdata.next_in = in;
bzdata.avail_in = size - 1;
bzdata.next_out = 0;
bzdata.avail_out = 0;
switch ( BZ2_bzDecompressInit(&bzdata, 0, 0) ) {
case BZ_CONFIG_ERROR: throw std::string("bzip2 library mis-compiled");
case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
case BZ_MEM_ERROR: throw std::string("Out of memory");
case BZ_OK:
default:
break;
}
// Get a block of memory
if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb)) {
BZ2_bzDecompressEnd(&bzdata);
throw std::string("Can't allocate output buffer");
}
// Decompress it
int bzerr = BZ_OK;
while (BZ_OK == (bzerr = BZ2_bzDecompress(&bzdata))) {
if (0 != getdata(bzdata.next_out, bzdata.avail_out,cb)) {
BZ2_bzDecompressEnd(&bzdata);
throw std::string("Can't allocate output buffer");
}
}
switch (bzerr) {
case BZ_PARAM_ERROR: throw std::string("Compressor internal error");
case BZ_MEM_ERROR: throw std::string("Out of memory");
case BZ_DATA_ERROR: throw std::string("Data integrity error");
case BZ_DATA_ERROR_MAGIC:throw std::string("Data is not BZIP2");
default: throw("Ooops");
case BZ_STREAM_END:
break;
}
// Finish
result = (static_cast<uint64_t>(bzdata.total_out_hi32) << 32) |
bzdata.total_out_lo32;
BZ2_bzDecompressEnd(&bzdata);
break;
}
#endif
case COMP_TYPE_ZLIB: {
#if !defined(HAVE_ZLIB)
throw std::string("Can't decompress ZLIB data");
#else
z_stream zdata;
zdata.zalloc = Z_NULL;
zdata.zfree = Z_NULL;
zdata.opaque = Z_NULL;
zdata.next_in = reinterpret_cast<Bytef*>(in);
zdata.avail_in = size - 1;
if ( Z_OK != inflateInit(&zdata))
throw std::string(zdata.msg ? zdata.msg : "zlib error");
if (0 != getdata((char*&)zdata.next_out, zdata.avail_out,cb)) {
inflateEnd(&zdata);
throw std::string("Can't allocate output buffer");
}
int zerr = Z_OK;
while (Z_OK == (zerr = inflate(&zdata,0))) {
if (0 != getdata((char*&)zdata.next_out, zdata.avail_out,cb)) {
inflateEnd(&zdata);
throw std::string("Can't allocate output buffer");
}
}
if (zerr != Z_STREAM_END)
throw std::string(zdata.msg?zdata.msg:"zlib error");
result = static_cast<uint64_t>(zdata.total_out);
inflateEnd(&zdata);
break;
#endif
}
case COMP_TYPE_SIMPLE: {
RLCOMP_stream sdata;
sdata.next_in = in;
sdata.avail_in = size - 1;
RLCOMP_init(&sdata);
if (0 != getdata(sdata.next_out, sdata.avail_out,cb)) {
throw std::string("Can't allocate output buffer");
}
while (!RLCOMP_decompress(&sdata)) {
if (0 != getdata(sdata.next_out, sdata.avail_out,cb)) {
throw std::string("Can't allocate output buffer");
}
}
result = sdata.output_count;
RLCOMP_end(&sdata);
break;
}
default:
throw std::string("Unknown type of compressed data");
}
return result;
}
}
// vim: sw=2 ai