
1461 lines
48 KiB

size_t GetSizeOfHeadData(bool heapPageData)
if (heapPageData) {
return SizeOfHeapPageHeaderData;
} else {
return SizeOfPageHeaderData;
// maybe some itemid is not valid
uint16 HeapPageCalcRealRowCnt (char *buf) {
HeapPageHeaderData *page = (HeapPageHeaderData *)buf;
uint16 cnt = 0;
uint16 i;
uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData);
for (i = 0; i < row_cnt; i++) {
if (ItemIdIsNormal(GET_ITEMID_BY_IDX(buf, i))) {
return cnt;
void DecompressDeconvertRows(char *buf, char *aux_buf, int16 *real_order, uint16 max_row_len, uint16 real_row_cnt) {
errno_t ret;
HeapPageHeaderData *page = (HeapPageHeaderData *)buf;
uint16 row_cnt = real_row_cnt;
uint32 total_size = page->pd_special - page->pd_upper;
char *copy_begin = buf + page->pd_upper;
char *row;
uint16 i, j, k, cur, up, row_size;
ret = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ);
securec_check(ret, "", "");
k = 0;
for (i = 0; i < max_row_len; i++) {
for (j = 0; j < row_cnt; j++) {
up = (j == (row_cnt - 1)) ? page->pd_special : GET_ITEMID_BY_IDX(buf, (real_order[j + 1]))->lp_off;
cur = GET_ITEMID_BY_IDX(buf, (real_order[j]))->lp_off;
row_size = up - cur;
row = aux_buf + cur;
if (i < row_size) {
row[i] = copy_begin[k++]; // this part is reshaped
if (k != total_size) {
printf("ERROR!!! pg_deconvert_rows error...!!!\n");
// cp aux_buf to page_buf
ret = memcpy_sp(copy_begin, total_size, aux_buf + page->pd_upper, total_size);
securec_check(ret, "", "");
return ;
// 1: as tuple_offset order, that means asc order.
// 2: store all itemid's idx.
// 3:maybe some itemid is not in order.
void CompressConvertItemRealOrder(char *buf, int16 *real_order, uint16 real_row_cnt) {
HeapPageHeaderData *page = (HeapPageHeaderData *)buf;
uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData);
ItemIdData *begin = (ItemIdData *)(buf + GetPageHeaderSize(page));
int16 *link_order = real_order + real_row_cnt;
int16 i, head, curr, prev;
int16 end = -1; // invalid index
head = end;
// very likely to seems that itemids stored by desc order, and ignore invalid itemid
for (i = 0; i < row_cnt; i++) {
if (!ItemIdIsNormal(begin + i)) {
if (head == end) { // set the head idx, insert the first
link_order[i] = end;
head = i;
if ((begin + i)->lp_off < (begin + head)->lp_off) {
link_order[i] = head; // update the head idx
head = i;
prev = head;
curr = link_order[head];
while ((curr != end) && ((begin + i)->lp_off > (begin + curr)->lp_off)) {
prev = curr;
curr = link_order[curr];
link_order[prev] = i;
link_order[i] = curr;
// arrange the link to array
curr = head;
for (i = 0; i < real_row_cnt; i++) {
real_order[i] = curr;
curr = link_order[curr];
if (curr != end) {
printf("ERROR!!! pre_convert_real_order error...!!!\n");
int DecompressPage(const char* src, char* dst, uint8 algorithm)
if (PageIs8BXidHeapVersion(src)) {
return TemplateDecompressPage<true>(src, dst, algorithm);
} else {
return TemplateDecompressPage<false>(src, dst, algorithm);
void cprs_diff_deconvert_rows(char *buf, uint32 offset, uint16 min_row_len, uint16 real_row_cnt) {
uint16 row_cnt = real_row_cnt;
uint32 common_size = min_row_len;
uint8 *copy_begin = (uint8 *)(buf + offset);
uint16 i, j;
for (i = 0; i < common_size; i++) {
for (j = 1; j < row_cnt; j++) {
copy_begin[i * row_cnt + j] += copy_begin[i * row_cnt + (j - 1)];
return ;
// to find all row size are diffs in MIN_DIFF_SIZE byts.
bool CompressConvertCheck(char *buf, int16 **real_order, uint16 *max_row_len, uint16 *min_row_len, uint16 *real_row_cnt) {
HeapPageHeaderData *page = (HeapPageHeaderData *)buf;
uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData);
int16 i, row_size;
ItemIdData *ptr = NULL;
uint16 up = page->pd_special;
uint16 min_size = GS_INVALID_ID16;
uint16 max_size = 0;
errno_t ret;
if (page->pd_lower < GetPageHeaderSize(page) || (page->pd_lower > page->pd_upper)) {
return false;
uint16 normal_row_cnt = HeapPageCalcRealRowCnt(buf);
if (normal_row_cnt < MIN_CONVERT_CNT) { // no need convert
return false;
// to store the real tuple order.
// the first part is real array order, and the second part is link.
*real_order = (int16 *)malloc(sizeof(uint16) * row_cnt * 2);
if (*real_order == NULL) {
printf("zfunc compress file");
return false;
ret = memset_sp(*real_order, sizeof(uint16) * row_cnt * 2, 0, sizeof(uint16) * row_cnt * 2);
securec_check(ret, "", "");
// order the ItemIds by tuple_offset order.
CompressConvertItemRealOrder(buf, *real_order, normal_row_cnt);
// do the check, to check all size of tuples.
for (i = normal_row_cnt - 1; i >= 0; i--) {
ptr = GET_ITEMID_BY_IDX(buf, ((*real_order)[i]));
row_size = up - ptr->lp_off;
if (row_size < MIN_CONVERT_CNT * 2) {
return false;
min_size = (row_size < min_size) ? row_size : min_size;
max_size = (row_size > max_size) ? row_size : max_size;
if ((max_size - min_size) > MIN_DIFF_SIZE) { // no need convert
return false;
up = ptr->lp_off;
// get the min row common size.
*max_row_len = max_size;
*min_row_len = min_size;
*real_row_cnt = normal_row_cnt;
return true;
void DecompressDeconvertItemIds(char *buf, char *aux_buf) {
errno_t ret;
HeapPageHeaderData *page = (HeapPageHeaderData *)buf;
uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData);
uint32 total_size = row_cnt * sizeof(ItemIdData);
char *copy_begin = buf + GetPageHeaderSize(page);
uint16 i, j, k;
// clear aux_buf
ret = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ);
securec_check(ret, "", "");
k = 0;
for (i = 0; i < sizeof(ItemIdData); i++) {
for (j = 0; j < row_cnt; j++) {
aux_buf[j * sizeof(ItemIdData) + i] = copy_begin[k++];
// cp aux_buf to page_buf
ret = memcpy_sp(copy_begin, total_size, aux_buf, total_size);
securec_check(ret, "", "");
return ;
void DecompressDeconvertOnePage(char *buf, char *aux_buf, bool diff_convert) {
uint16 max_row_len = 0;
uint16 min_row_len = 0;
int16 *real_order = NULL; // itemids are not in order sometimes. we must find the real
uint16 real_row_cnt = 0;
if (diff_convert) {
cprs_diff_deconvert_rows(buf, GetPageHeaderSize(buf), sizeof(ItemIdData),
(((HeapPageHeaderData *)buf)->pd_lower - GetPageHeaderSize(buf)) / sizeof(ItemIdData));
// =======firstly, arrange the itemids.
DecompressDeconvertItemIds(buf, aux_buf);
if (!CompressConvertCheck(buf, &real_order, &max_row_len, &min_row_len, &real_row_cnt)) {
if (real_order != NULL) {
return ;
// =======and last, the tuples
if (diff_convert) {
cprs_diff_deconvert_rows(buf, ((HeapPageHeaderData *)buf)->pd_upper, min_row_len, real_row_cnt);
DecompressDeconvertRows(buf, aux_buf, real_order, max_row_len, real_row_cnt);
if (real_order != NULL) {
return ;
void DecompressPageDeconvert(char *src, bool diff_convert)
char *aux_buf = NULL;
errno_t rc;
aux_buf = (char *)malloc(BLCKSZ);
if (aux_buf == NULL) {
// add log
rc = memset_s(aux_buf, BLCKSZ, 0, BLCKSZ);
securec_check(rc, "", "");
// do convert
DecompressDeconvertOnePage(src, aux_buf, diff_convert);
if (aux_buf != NULL) {
* DecompressPage() -- Decompress one compressed page.
* return size of decompressed page which should be BLCKSZ or
* -1 for decompress error
* -2 for unrecognized compression algorithm
* note:The size of dst must be greater than or equal to BLCKSZ.
template<bool heapPageData>
int TemplateDecompressPage(const char* src, char* dst, uint8 algorithm)
int decompressed_size;
char* data;
uint32 size;
bool byte_convert, diff_convert;
size_t sizeOfPageHeaderData = GetSizeOfHeadData(heapPageData);
int rc = memcpy_s(dst, sizeOfPageHeaderData, src, sizeOfPageHeaderData);
securec_check(rc, "", "");
if (heapPageData) {
data = ((HeapPageCompressData*) src)->data;
size = ((HeapPageCompressData*) src)->size;
byte_convert = ((HeapPageCompressData*) src)->byte_convert;
diff_convert = ((HeapPageCompressData*) src)->diff_convert;
} else {
data = ((PageCompressData*) src)->data;
size = ((PageCompressData*) src)->size;
byte_convert = ((PageCompressData*) src)->byte_convert;
diff_convert = ((PageCompressData*) src)->diff_convert;
switch (algorithm) {
decompressed_size = lz_decompress(
data, size, dst + sizeOfPageHeaderData, BLCKSZ - sizeOfPageHeaderData, false);
decompressed_size =
ZSTD_decompress(dst + sizeOfPageHeaderData, BLCKSZ - sizeOfPageHeaderData, data, size);
if (ZSTD_isError(decompressed_size)) {
return -1;
if (byte_convert) {
// deconvert dst
DecompressPageDeconvert(dst, diff_convert);
return sizeOfPageHeaderData + decompressed_size;
// pg_lz
/* ----------
* pg_lzcompress.c -
* This is an implementation of LZ compression for PostgreSQL.
* It uses a simple history table and generates 2-3 byte tags
* capable of backward copy information for 3-273 bytes with
* a max offset of 4095.
* Entry routines:
* bool
* pglz_compress(const char *source, int32 slen, PGLZ_Header *dest,
* const PGLZ_Strategy *strategy);
* source is the input data to be compressed.
* slen is the length of the input data.
* dest is the output area for the compressed result.
* It must be at least as big as PGLZ_MAX_OUTPUT(slen).
* strategy is a pointer to some information controlling
* the compression algorithm. If NULL, the compiled
* in default strategy is used.
* The return value is TRUE if compression succeeded,
* FALSE if not; in the latter case the contents of dest
* are undefined.
* void
* pglz_decompress(const PGLZ_Header *source, char *dest)
* source is the compressed input.
* dest is the area where the uncompressed data will be
* written to. It is the callers responsibility to
* provide enough space. The required amount can be
* obtained with the macro PGLZ_RAW_SIZE(source).
* The data is written to buff exactly as it was handed
* to pglz_compress(). No terminating zero byte is added.
* The decompression algorithm and internal data format:
* PGLZ_Header is defined as
* typedef struct PGLZ_Header {
* int32 vl_len_;
* int32 rawsize;
* }
* The header is followed by the compressed data itself.
* The data representation is easiest explained by describing
* the process of decompression.
* If VARSIZE(x) == rawsize + sizeof(PGLZ_Header), then the data
* is stored uncompressed as plain bytes. Thus, the decompressor
* simply copies rawsize bytes from the location after the
* header to the destination.
* Otherwise the first byte after the header tells what to do
* the next 8 times. We call this the control byte.
* An unset bit in the control byte means, that one uncompressed
* byte follows, which is copied from input to output.
* A set bit in the control byte means, that a tag of 2-3 bytes
* follows. A tag contains information to copy some bytes, that
* are already in the output buffer, to the current location in
* the output. Let's call the three tag bytes T1, T2 and T3. The
* position of the data to copy is coded as an offset from the
* actual output position.
* The offset is in the upper nibble of T1 and in T2.
* The length is in the lower nibble of T1.
* So the 16 bits of a 2 byte tag are coded as
* 7---T1--0 7---T2--0
* This limits the offset to 1-4095 (12 bits) and the length
* to 3-18 (4 bits) because 3 is always added to it. To emit
* a tag of 2 bytes with a length of 2 only saves one control
* bit. But we lose one byte in the possible length of a tag.
* In the actual implementation, the 2 byte tag's length is
* limited to 3-17, because the value 0xF in the length nibble
* has special meaning. It means, that the next following
* byte (T3) has to be added to the length value of 18. That
* makes total limits of 1-4095 for offset and 3-273 for length.
* Now that we have successfully decoded a tag. We simply copy
* the output that occurred <offset> bytes back to the current
* output location in the specified <length>. Thus, a
* sequence of 200 spaces (think about bpchar fields) could be
* coded in 4 bytes. One literal space and a three byte tag to
* copy 199 bytes with a -1 offset. Whow - that's a compression
* rate of 98%! Well, the implementation needs to save the
* original data size too, so we need another 4 bytes for it
* and end up with a total compression rate of 96%, what's still
* worth a Whow.
* The compression algorithm
* The following uses numbers used in the default strategy.
* The compressor works best for attributes of a size between
* 1K and 1M. For smaller items there's not that much chance of
* redundancy in the character sequence (except for large areas
* of identical bytes like trailing spaces) and for bigger ones
* our 4K maximum look-back distance is too small.
* The compressor creates a table for 8192 lists of positions.
* For each input position (except the last 3), a hash key is
* built from the 4 next input bytes and the position remembered
* in the appropriate list. Thus, the table points to linked
* lists of likely to be at least in the first 4 characters
* matching strings. This is done on the fly while the input
* is compressed into the output area. Table entries are only
* kept for the last 4096 input positions, since we cannot use
* back-pointers larger than that anyway.
* For each byte in the input, it's hash key (built from this
* byte and the next 3) is used to find the appropriate list
* in the table. The lists remember the positions of all bytes
* that had the same hash key in the past in increasing backward
* offset order. Now for all entries in the used lists, the
* match length is computed by comparing the characters from the
* entries position with the characters from the actual input
* position.
* The compressor starts with a so called "good_match" of 128.
* It is a "prefer speed against compression ratio" optimizer.
* So if the first entry looked at already has 128 or more
* matching characters, the lookup stops and that position is
* used for the next tag in the output.
* For each subsequent entry in the history list, the "good_match"
* is lowered by 10%. So the compressor will be more happy with
* short matches the farer it has to go back in the history.
* Another "speed against ratio" preference characteristic of
* the algorithm.
* Thus there are 3 stop conditions for the lookup of matches:
* - a match >= good_match is found
* - there are no more history entries to look at
* - the next history entry is already too far back
* to be coded into a tag.
* Finally the match algorithm checks that at least a match
* of 3 or more bytes has been found, because thats the smallest
* amount of copy information to code into a tag. If so, a tag
* is omitted and all the input bytes covered by that are just
* scanned for the history add's, otherwise a literal character
* is omitted and only his history entry added.
* Acknowledgements:
* Many thanks to Adisak Pochanayon, who's article about SLZ
* inspired me to write the PostgreSQL compression this way.
* Jan Wieck
* Copyright (c) 1999-2012, PostgreSQL Global Development Group
* src/backend/utils/adt/pg_lzcompress.c
* ----------
#include "postgres.h"
#include "knl/knl_variable.h"
#include <limits.h>
#include "utils/pg_lzcompress.h"
/* ----------
* The provided standard strategies
* ----------
static const PGLZ_Strategy strategy_default_data = {
32, /* Data chunks less than 32 bytes are not
* compressed */
INT_MAX, /* No upper limit on what we'll try to
* compress */
25, /* Require 25% compression rate, or not worth
* it */
1024, /* Give up if no compression in the first 1KB */
128, /* Stop history lookup if a match of 128 bytes
* is found */
10 /* Lower good match size by 10% at every loop
* iteration */
const PGLZ_Strategy* const PGLZ_strategy_default = &strategy_default_data;
static const PGLZ_Strategy strategy_always_data = {
0, /* Chunks of any size are compressed */
0, /* It's enough to save one single byte */
INT_MAX, /* Never give up early */
128, /* Stop history lookup if a match of 128 bytes
* is found */
6 /* Look harder for a good match */
const PGLZ_Strategy* const PGLZ_strategy_always = &strategy_always_data;
/* ----------
* pglz_hist_idx -
* Computes the history table slot for the lookup by the next 4
* characters in the input.
* NB: because we use the next 4 characters, we are not guaranteed to
* find 3-character matches; they very possibly will be in the wrong
* hash list. This seems an acceptable tradeoff for spreading out the
* hash keys more.
* ----------
#define pglz_hist_idx(_s, _e) \
(((((_e) - (_s)) < 4) ? (int)(_s)[0] \
: (((unsigned char)((_s)[0]) << 9) ^ ((unsigned char)((_s)[1]) << 6) ^ \
((unsigned char)((_s)[2]) << 3) ^ (unsigned char)((_s)[3]))) & \
/* ----------
* pglz_hist_add -
* Adds a new entry to the history table.
* If _recycle is true, then we are recycling a previously used entry,
* and must first delink it from its old hashcode's linked list.
* NOTE: beware of multiple evaluations of macro's arguments, and note that
* _hn and _recycle are modified in the macro.
* ----------
#define pglz_hist_add(_hs, _he, _hn, _recycle, _s, _e) \
do { \
int __hindex = pglz_hist_idx((_s), (_e)); \
PGLZ_HistEntry** __myhsp = &(_hs)[__hindex]; \
PGLZ_HistEntry* __myhe = &(_he)[_hn]; \
if (_recycle) { \
if (__myhe->prev == NULL) \
(_hs)[__myhe->hindex] = __myhe->next; \
else \
__myhe->prev->next = __myhe->next; \
if (__myhe->next != NULL) \
__myhe->next->prev = __myhe->prev; \
} \
__myhe->next = *__myhsp; \
__myhe->prev = NULL; \
__myhe->hindex = __hindex; \
__myhe->pos = (_s); \
if (*__myhsp != NULL) \
(*__myhsp)->prev = __myhe; \
*__myhsp = __myhe; \
if (++(_hn) >= PGLZ_HISTORY_SIZE) { \
(_hn) = 0; \
(_recycle) = true; \
} \
} while (0)
/* ----------
* pglz_out_ctrl -
* Outputs the last and allocates a new control byte if needed.
* ----------
#define pglz_out_ctrl(__ctrlp, __ctrlb, __ctrl, __buf) \
do { \
if ((((unsigned char)(__ctrl)) & 0xff) == 0) { \
*(__ctrlp) = __ctrlb; \
__ctrlp = (__buf)++; \
__ctrlb = 0; \
__ctrl = 1; \
} \
} while (0)
/* ----------
* pglz_out_literal -
* Outputs a literal byte to the destination buffer including the
* appropriate control bit.
* ----------
#define pglz_out_literal(_ctrlp, _ctrlb, _ctrl, _buf, _byte) \
do { \
pglz_out_ctrl(_ctrlp, _ctrlb, _ctrl, _buf); \
*(_buf)++ = (unsigned char)(_byte); \
(_ctrl) <<= 1; \
} while (0)
/* ----------
* pglz_out_tag -
* Outputs a backward reference tag of 2-4 bytes (depending on
* offset and length) to the destination buffer including the
* appropriate control bit.
* ----------
#define pglz_out_tag(_ctrlp, _ctrlb, _ctrl, _buf, _len, _off) \
do { \
pglz_out_ctrl(_ctrlp, _ctrlb, _ctrl, _buf); \
(_ctrlb) |= (_ctrl); \
(_ctrl) <<= 1; \
if ((_len) > 17) { \
(_buf)[0] = (unsigned char)((((uint32)(_off)&0xf00) >> 4) | 0x0f); \
(_buf)[1] = (unsigned char)(((uint32)(_off)&0xff)); \
(_buf)[2] = (unsigned char)((_len)-18); \
(_buf) += 3; \
} else { \
(_buf)[0] = (unsigned char)((((uint32)(_off)&0xf00) >> 4) | ((uint32)(_len)-3)); \
(_buf)[1] = (unsigned char)((uint32)(_off)&0xff); \
(_buf) += 2; \
} \
} while (0)
#define HIST_START_LEN (sizeof(PGLZ_HistEntry*) * PGLZ_HISTORY_LISTS)
#define PGLZ_MAX_HISTORY_LISTS 8192 /* must be power of 2 */
static PGLZ_HistEntry* hist_start[PGLZ_MAX_HISTORY_LISTS];
static PGLZ_HistEntry hist_entries[PGLZ_HISTORY_SIZE + 1];
/* ----------
* pglz_find_match -
* Lookup the history table if the actual input stream matches
* another sequence of characters, starting somewhere earlier
* in the input buffer.
* ----------
static inline int pglz_find_match(
PGLZ_HistEntry** hstart, const char* input, const char* end, int* lenp, int* offp, int good_match, int good_drop)
PGLZ_HistEntry* hent = NULL;
int32 len = 0;
int32 off = 0;
* Traverse the linked history list until a good enough match is found.
hent = hstart[pglz_hist_idx(input, end)];
while (hent != NULL) {
const char* ip = input;
const char* hp = hent->pos;
int32 thisoff;
int32 thislen;
* Stop if the offset does not fit into our tag anymore.
thisoff = ip - hp;
if (thisoff >= 0x0fff)
* Determine length of match. A better match must be larger than the
* best so far. And if we already have a match of 16 or more bytes,
* it's worth the call overhead to use memcmp() to check if this match
* is equal for the same size. After that we must fallback to
* character by character comparison to know the exact position where
* the diff occurred.
thislen = 0;
if (len >= 16) {
if (memcmp(ip, hp, len) == 0) {
thislen = len;
ip += len;
hp += len;
while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH) {
} else {
while (ip < end && *ip == *hp && thislen < PGLZ_MAX_MATCH) {
* Remember this match as the best (if it is)
if (thislen > len) {
len = thislen;
off = thisoff;
* Advance to the next history entry
hent = hent->next;
* Be happy with lesser good matches the more entries we visited. But
* no point in doing calculation if we're at end of list.
if (hent != NULL) {
if (len >= good_match)
good_match -= (good_match * good_drop) / 100;
* Return match information only if it results at least in one byte
* reduction.
if (len > 2) {
*lenp = len;
*offp = off;
return 1;
return 0;
/* ----------
* pglz_compress -
* Compresses source into dest using strategy.
* ----------
bool pglz_compress(const char* source, int32 slen, PGLZ_Header* dest, const PGLZ_Strategy* strategy)
unsigned char* bp = ((unsigned char*)dest) + sizeof(PGLZ_Header);
unsigned char* bstart = bp;
int hist_next = 0;
bool hist_recycle = false;
const char* dp = source;
const char* dend = source + slen;
unsigned char ctrl_dummy = 0;
unsigned char* ctrlp = &ctrl_dummy;
unsigned char ctrlb = 0;
unsigned char ctrl = 0;
bool found_match = false;
int32 match_len;
int32 match_off;
int32 good_match;
int32 good_drop;
int32 result_size;
int32 result_max;
int32 need_rate;
* Our fallback strategy is the default.
if (strategy == NULL)
strategy = PGLZ_strategy_default;
* If the strategy forbids compression (at all or if source chunk size out
* of range), fail.
if (strategy->match_size_good <= 0 || slen < strategy->min_input_size || slen > strategy->max_input_size)
return false;
* Save the original source size in the header.
dest->rawsize = slen;
* Limit the match parameters to the supported range.
good_match = strategy->match_size_good;
if (good_match > PGLZ_MAX_MATCH)
good_match = PGLZ_MAX_MATCH;
else if (good_match < 17)
good_match = 17;
good_drop = strategy->match_size_drop;
if (good_drop < 0)
good_drop = 0;
else if (good_drop > 100)
good_drop = 100;
need_rate = strategy->min_comp_rate;
if (need_rate < 0)
need_rate = 0;
else if (need_rate > 99)
need_rate = 99;
* Compute the maximum result size allowed by the strategy, namely the
* input size minus the minimum wanted compression rate. This had better
* be <= slen, else we might overrun the provided output buffer.
if (slen > (INT_MAX / 100)) {
/* Approximate to avoid overflow */
result_max = (slen / 100) * (100 - need_rate);
} else
result_max = (slen * (100 - need_rate)) / 100;
* Initialize the history lists to empty. We do not need to zero the
* hist_entries[] array; its entries are initialized as they are used.
errno_t rc = memset_s(hist_start, HIST_START_LEN, 0, HIST_START_LEN);
securec_check(rc, "\0", "\0");
* Compress the source directly into the output buffer.
while (dp < dend) {
* If we already exceeded the maximum result size, fail.
* We check once per loop; since the loop body could emit as many as 4
* bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better
* allow 4 slop bytes.
if (bp - bstart >= result_max)
return false;
* If we've emitted more than first_success_by bytes without finding
* anything compressible at all, fail. This lets us fall out
* reasonably quickly when looking at incompressible input (such as
* pre-compressed data).
if (!found_match && bp - bstart >= strategy->first_success_by)
return false;
* Try to find a match in the history
if (pglz_find_match(hist_start, dp, dend, &match_len, &match_off, good_match, good_drop)) {
* Create the tag and add history entries for all matched
* characters.
pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off);
while (match_len--) {
hist_start, hist_entries, hist_next, hist_recycle, dp, dend);
dp++; /* Do not do this ++ in the line above! */
/* The macro would do it four times - Jan. */
found_match = true;
} else {
* No match found. Copy one literal byte.
pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp);
hist_start, hist_entries, hist_next, hist_recycle, dp, dend);
dp++; /* Do not do this ++ in the line above! */
/* The macro would do it four times - Jan. */
* Write out the last control byte and check that we haven't overrun the
* output size allowed by the strategy.
*ctrlp = ctrlb;
result_size = bp - bstart;
if (result_size >= result_max)
return false;
* Success - need only fill in the actual length of the compressed datum.
SET_VARSIZE_COMPRESSED(dest, result_size + sizeof(PGLZ_Header));
return true;
/* ----------
* lz_compress -
* Compresses source into dest using strategy. Returns the number of
* bytes written in buffer dest, or -1 if compression fails.
* ----------
int32 lz_compress(const char* source, int32 slen, char* dest)
unsigned char* bp = (unsigned char*) dest;
unsigned char* bstart = bp;
int hist_next = 0;
bool hist_recycle = false;
const char* dp = source;
const char* dend = source + slen;
unsigned char ctrl_dummy = 0;
unsigned char* ctrlp = &ctrl_dummy;
unsigned char ctrlb = 0;
unsigned char ctrl = 0;
bool found_match = false;
int32 match_len;
int32 match_off;
int32 good_match;
int32 good_drop;
int32 result_size;
int32 result_max;
int32 need_rate;
errno_t rc;
const PGLZ_Strategy* strategy = PGLZ_strategy_always;
* Our fallback strategy is the default.
if (strategy == NULL) {
strategy = PGLZ_strategy_default;
* If the strategy forbids compression (at all or if source chunk size out
* of range), fail.
if (strategy->match_size_good <= 0 || slen < strategy->min_input_size || slen > strategy->max_input_size) {
return -1;
* Limit the match parameters to the supported range.
good_match = strategy->match_size_good;
if (good_match > PGLZ_MAX_MATCH) {
good_match = PGLZ_MAX_MATCH;
} else if (good_match < 17) {
good_match = 17;
good_drop = strategy->match_size_drop;
if (good_drop < 0) {
good_drop = 0;
} else if (good_drop > 100) {
good_drop = 100;
need_rate = strategy->min_comp_rate;
if (need_rate < 0) {
need_rate = 0;
} else if (need_rate > 99) {
need_rate = 99;
* Compute the maximum result size allowed by the strategy, namely the
* input size minus the minimum wanted compression rate. This had better
* be <= slen, else we might overrun the provided output buffer.
if (slen > (INT_MAX / 100)) {
/* Approximate to avoid overflow */
result_max = (slen / 100) * (100 - need_rate);
} else {
result_max = (slen * (100 - need_rate)) / 100;
* Initialize the history lists to empty. We do not need to zero the
* hist_entries[] array; its entries are initialized as they are used.
rc = memset_s(hist_start, HIST_START_LEN, 0, HIST_START_LEN);
securec_check(rc, "\0", "\0");
* Compress the source directly into the output buffer.
while (dp < dend) {
* If we already exceeded the maximum result size, fail.
* We check once per loop; since the loop body could emit as many as 4
* bytes (a control byte and 3-byte tag), PGLZ_MAX_OUTPUT() had better
* allow 4 slop bytes.
if (bp - bstart >= result_max) {
return -1;
* If we've emitted more than first_success_by bytes without finding
* anything compressible at all, fail. This lets us fall out
* reasonably quickly when looking at incompressible input (such as
* pre-compressed data).
if (!found_match && bp - bstart >= strategy->first_success_by) {
return -1;
* Try to find a match in the history
if (pglz_find_match(hist_start, dp, dend, &match_len, &match_off, good_match, good_drop)) {
* Create the tag and add history entries for all matched
* characters.
pglz_out_tag(ctrlp, ctrlb, ctrl, bp, match_len, match_off);
while (match_len--) {
hist_start, hist_entries, hist_next, hist_recycle, dp,
dp++; /* Do not do this ++ in the line above! */
/* The macro would do it four times - Jan. */
found_match = true;
} else {
* No match found. Copy one literal byte.
pglz_out_literal(ctrlp, ctrlb, ctrl, bp, *dp);
hist_start, hist_entries, hist_next, hist_recycle, dp, dend);
dp++; /* Do not do this ++ in the line above! */
/* The macro would do it four times - Jan. */
* Write out the last control byte and check that we haven't overrun the
* output size allowed by the strategy.
*ctrlp = ctrlb;
result_size = bp - bstart;
if (result_size >= result_max) {
return -1;
/* success */
return result_size;
/* ----------
* pglz_decompress -
* Decompresses source into dest. Returns the number of bytes
* decompressed in the destination buffer, and *optionally*
* checks that both the source and dest buffers have been
* fully read and written to, respectively.
* ----------
int32 lz_decompress(const char* source, int32 slen, char* dest, int32 rawsize, bool check_complete)
const unsigned char* sp;
const unsigned char* srcend;
unsigned char* dp;
unsigned char* destend;
errno_t rc = 0;
sp = (const unsigned char*) source;
srcend = ((const unsigned char*) source) + slen;
dp = (unsigned char*) dest;
destend = dp + rawsize;
while (sp < srcend && dp < destend) {
* Read one control byte and process the next 8 items (or as many as
* remain in the compressed input).
unsigned char ctrl = *sp++;
int ctrlc;
for (ctrlc = 0; ctrlc < 8 && sp < srcend && dp < destend; ctrlc++) {
if (ctrl & 1) {
* Set control bit means we must read a match tag. The match
* is coded with two bytes. First byte uses lower nibble to
* code length - 3. Higher nibble contains upper 4 bits of the
* offset. The next following byte contains the lower 8 bits
* of the offset. If the length is coded as 18, another
* extension tag byte tells how much longer the match really
* was (0-255).
int32 len;
int32 off;
len = (sp[0] & 0x0f) + 3;
off = ((sp[0] & 0xf0) << 4) | sp[1];
sp += 2;
if (len == 18) {
len += *sp++;
* Now we copy the bytes specified by the tag from OUTPUT to
* OUTPUT (copy len bytes from dp - off to dp). The copied
* areas could overlap, to preven possible uncertainty, we
* copy only non-overlapping regions.
len = Min(len, destend - dp);
while (off < len) {
* When offset is smaller than length - source and
* destination regions overlap. memmove() is resolving
* this overlap in an incompatible way with pglz. Thus we
* resort to memcpy()-ing non-overlapping regions.
* Consider input: 112341234123412341234
* At byte 5 here ^ we have match with length 16 and
* offset 4. 11234M(len=16, off=4)
* We are decoding first period of match and rewrite match
* 112341234M(len=12, off=8)
* The same match is now at position 9, it points to the
* same start byte of output, but from another position:
* the offset is doubled.
* We iterate through this offset growth until we can
* proceed to usual memcpy(). If we would try to decode
* the match at byte 5 (len=16, off=4) by memmove() we
* would issue memmove(5, 1, 16) which would produce
* 112341234XXXXXXXXXXXX, where series of X is 12
* undefined bytes, that were at bytes [5:17].
* ---------
errno_t rc = memcpy_s(dp, off + 1, dp - off, off);
securec_check(rc, "", "");
len -= off;
dp += off;
off += off;
rc = memcpy_s(dp, len + 1, dp - off, len);
securec_check(rc, "", "");
dp += len;
} else {
* An unset control bit means LITERAL BYTE. So we just copy
* one from INPUT to OUTPUT.
*dp++ = *sp++;
* Advance the control bit
ctrl >>= 1;
* Check we decompressed the right amount. If we are slicing, then we
* won't necessarily be at the end of the source or dest buffers when we
* hit a stop, so we don't test them.
if (check_complete && (dp != destend || sp != srcend)) {
return -1;
* That's it.
return (char*) dp - dest;
int CompressPage(const char* src, char* dst, int dst_size, RelFileCompressOption option)
if (PageIs8BXidHeapVersion(src)) {
return TemplateCompressPage<true>(src, dst, dst_size, option);
} else {
return TemplateCompressPage<false>(src, dst, dst_size, option);
void CompressConvertRows(char *buf, char *aux_buf, int16 *real_order, uint16 max_row_len, uint16 real_row_cnt) {
errno_t ret;
HeapPageHeaderData *page = (HeapPageHeaderData *)buf;
uint16 row_cnt = real_row_cnt;
uint32 total_size = page->pd_special - page->pd_upper;
char *copy_begin = buf + page->pd_upper;
char *row;
uint16 i, j, k, cur, up, row_size;
ret = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ);
securec_check(ret, "", "");
k = 0;
for (i = 0; i < max_row_len; i++) {
for (j = 0; j < row_cnt; j++) {
up = (j == (row_cnt - 1)) ? page->pd_special : GET_ITEMID_BY_IDX(buf, (real_order[j + 1]))->lp_off;
cur = GET_ITEMID_BY_IDX(buf, (real_order[j]))->lp_off;
row_size = up - cur;
row = buf + cur;
if (i < row_size) {
aux_buf[k++] = row[i]; // this part is reshaped
if (k != total_size) {
printf("ERROR!!! convert_rows_2 error...!!!\n");
// cp aux_buf to page_buf
ret = memcpy_sp(copy_begin, total_size, aux_buf, total_size);
securec_check(ret, "", "");
return ;
void CompressConvertItemIds(char *buf, char *aux_buf) {
errno_t ret;
HeapPageHeaderData *page = (HeapPageHeaderData *)buf;
uint16 row_cnt = (page->pd_lower - GetPageHeaderSize(page)) / sizeof(ItemIdData);
uint32 total_size = row_cnt * sizeof(ItemIdData);
char *copy_begin = buf + GetPageHeaderSize(page);
uint16 i, j, k;
// clear aux_buf
ret = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ);
securec_check(ret, "", "");
k = 0;
for (i = 0; i < row_cnt; i++) {
for (j = 0; j < sizeof(ItemIdData); j++) {
aux_buf[j * row_cnt + i] = copy_begin[k++];
// cp aux_buf to page_buf
ret = memcpy_sp(copy_begin, total_size, aux_buf, total_size);
securec_check(ret, "", "");
return ;
void cprs_diff_convert_rows(char *buf, uint32 offset,uint16 min_row_len, uint16 real_row_cnt) {
uint16 row_cnt = real_row_cnt;
uint32 common_size = min_row_len;
uint8 *copy_begin = (uint8 *)(buf + offset);
uint16 i, j;
for (i = 0; i < common_size; i++) {
for (j = row_cnt - 1; j > 0; j--) {
copy_begin[i * row_cnt + j] -= copy_begin[i * row_cnt + (j - 1)];
return ;
bool CompressConvertOnePage(char *buf, char *aux_buf, bool diff_convert) {
uint16 max_row_len = 0;
uint16 min_row_len = 0;
int16 *real_order = NULL; // itemids are not in order sometimes. we must find the real
uint16 real_row_cnt = 0;
if (!CompressConvertCheck(buf, &real_order, &max_row_len, &min_row_len, &real_row_cnt)) {
if (real_order != NULL) {
return false;
CompressConvertRows(buf, aux_buf, real_order, max_row_len, real_row_cnt);
CompressConvertItemIds(buf, aux_buf);
if (diff_convert) {
cprs_diff_convert_rows(buf, ((HeapPageHeaderData *)buf)->pd_upper, min_row_len, real_row_cnt);
cprs_diff_convert_rows(buf, GetPageHeaderSize(buf), sizeof(ItemIdData),
(((HeapPageHeaderData *)buf)->pd_lower - GetPageHeaderSize(buf)) / sizeof(ItemIdData));
if (real_order != NULL) {
return true;
void CompressPagePrepareConvert(char *src, bool diff_convert, bool *real_ByteConvert)
char *aux_buf = NULL;
errno_t rc;
aux_buf = (char *)malloc(BLCKSZ);
if (aux_buf == NULL) {
// add log
rc = memset_sp(aux_buf, BLCKSZ, 0, BLCKSZ);
securec_check(rc, "", "");
// do convert
*real_ByteConvert = false;
if (CompressConvertOnePage(src, aux_buf, diff_convert)) {
*real_ByteConvert = true;
if (aux_buf != NULL) {
* CompressPage() -- Compress one page.
* Only the parts other than the page header will be compressed. The
* compressed data is rounded by chunck_size, The insufficient part is
* filled with zero. Compression needs to be able to save at least one
* chunk of space, otherwise it fail.
* This function returen the size of compressed data or
* -1 for compression fail
* COMPRESS_UNSUPPORTED_ERROR for unrecognized compression algorithm
template<bool heapPageData>
int TemplateCompressPage(const char* src, char* dst, int dst_size, RelFileCompressOption option)
int compressed_size;
int8 level = option.compressLevelSymbol ? option.compressLevel : -option.compressLevel;
size_t sizeOfHeaderData = GetSizeOfHeadData(heapPageData);
char *src_copy = NULL;
bool real_ByteConvert = false;
errno_t rc;
char* data;
if (option.byteConvert) {
// copy and maybe change it
src_copy = (char *)malloc(BLCKSZ);
if (src_copy == NULL) {
// add log
return -1;
rc = memcpy_s(src_copy, BLCKSZ, src, BLCKSZ);
securec_check(rc, "", "");
CompressPagePrepareConvert(src_copy, option.diffConvert, &real_ByteConvert); /* preprocess convert src */
if (heapPageData) {
data = ((HeapPageCompressData*)dst)->data;
} else {
data = ((PageCompressData*)dst)->data;
switch (option.compressAlgorithm) {
if (real_ByteConvert) {
compressed_size = lz_compress(src_copy + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, data);
} else {
compressed_size = lz_compress(src + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, data);
if (level == 0 || level < MIN_ZSTD_COMPRESSION_LEVEL || level > MAX_ZSTD_COMPRESSION_LEVEL) {
if (real_ByteConvert) {
compressed_size = ZSTD_compress(data, dst_size, src_copy + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, level);
} else {
compressed_size = ZSTD_compress(data, dst_size, src + sizeOfHeaderData, BLCKSZ - sizeOfHeaderData, level);
if (ZSTD_isError(compressed_size)) {
if (src_copy != NULL) {
return -1;
if (src_copy != NULL) {
if (compressed_size < 0) {
if (src_copy != NULL) {
return -1;
if (heapPageData) {
HeapPageCompressData* pcdptr = ((HeapPageCompressData*)dst);
rc = memcpy_s(pcdptr->page_header, sizeOfHeaderData, src, sizeOfHeaderData);
securec_check(rc, "", "");
pcdptr->size = compressed_size;
pcdptr->byte_convert = real_ByteConvert;
pcdptr->diff_convert = option.diffConvert;
} else {
PageCompressData* pcdptr = ((PageCompressData*)dst);
rc = memcpy_s(pcdptr->page_header, sizeOfHeaderData, src, sizeOfHeaderData);
securec_check(rc, "", "");
pcdptr->size = compressed_size;
pcdptr->byte_convert = real_ByteConvert;
pcdptr->diff_convert = option.diffConvert;
if (src_copy != NULL) {
return SIZE_OF_PAGE_COMPRESS_DATA_HEADER_DATA(heapPageData) + compressed_size;
* CompressPageBufferBound()
* -- Get the destination buffer boundary to compress one page.
* Return needed destination buffer size for compress one page or
* -1 for unrecognized compression algorithm
int CompressPageBufferBound(const char* page, uint8 algorithm)
switch (algorithm) {
return BLCKSZ + 4;
return ZSTD_compressBound(BLCKSZ - GetPageHeaderSize(page));
return -1;