135 lines
5.0 KiB
C++
135 lines
5.0 KiB
C++
/*
|
|
* CompressedInt.h
|
|
*
|
|
* This source file is part of the FoundationDB open source project
|
|
*
|
|
* Copyright 2013-2022 Apple Inc. and the FoundationDB project authors
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#pragma once
|
|
#include <stdint.h>
|
|
|
|
// A signed compressed integer format that retains ordering in compressed form.
|
|
// Format is: [~sign_bit] [unary_len] [value_bits]
|
|
// If the sign bit is 0 then all other bits are inverted to maintain sort order
|
|
// in encoded form.
|
|
//
|
|
// Examples: 0/1/n = bit N=byte bits (w/o sign) Total encoded value length
|
|
// 10nn nnnn 6 bits 1 byte
|
|
// 110n nnnn N{1} 13 bits 2 bytes
|
|
// 1110 nnnn N{2} 20 bits 3 bytes
|
|
// 1111 0nnn N{3} 27 bits 4 bytes
|
|
// 1111 10nn N{4} 34 bits 5 bytes
|
|
// 1111 110n N{5} 41 bits 6 bytes
|
|
// 1111 1110 N{6} 48 bits 7 bytes
|
|
// 1111 1111 0nnn nnnn N{6} 55 bits 8 bytes
|
|
// 1111 1111 10nn nnnn N{7} 62 bits 9 bytes
|
|
// 1111 1111 110n nnnn N{8} 69 bits 10 bytes
|
|
// 1111 1111 1110 nnnn N{9} 76 bits 11 bytes
|
|
// 1111 1111 1111 0nnn N{10} 83 bits 12 bytes
|
|
// 1111 1111 1111 10nn N{11} 90 bits 13 bytes
|
|
// 1111 1111 1111 110n N{12} 97 bits 14 bytes
|
|
// 1111 1111 1111 1110 N{13} 104 bits 15 bytes
|
|
// 1111 1111 1111 1111 0nnn nnnn N{13} 111 bits 16 bytes
|
|
// 1111 1111 1111 1111 10nn nnnn N{14} 118 bits 17 bytes
|
|
// 1111 1111 1111 1111 110n nnnn N{15} 125 bits 18 bytes
|
|
template <typename IntType>
|
|
struct CompressedInt {
|
|
CompressedInt(IntType i = 0) : value(i) {}
|
|
IntType value;
|
|
template <class Ar>
|
|
void serialize(Ar& ar) {
|
|
if (ar.isDeserializing) {
|
|
uint8_t b;
|
|
serializer(ar, b);
|
|
int bytesToRead = 0; // Additional bytes to read after the required first byte
|
|
bool positive = (b & 0x80) != 0; // Sign bit
|
|
if (!positive)
|
|
b = ~b; // Negative, so invert bytes read
|
|
b &= 0x7f; // Clear sign bit
|
|
|
|
uint8_t hb = 0x40; // Next header bit to test
|
|
// Scan the unary len bits across multiple bytes if needed
|
|
while (1) {
|
|
if (hb == 0) { // Go to next byte if needed
|
|
serializer(ar, b); // Read byte
|
|
if (!positive)
|
|
b = ~b; // Negative, so invert bytes read
|
|
|
|
hb = 0x80; // Reset header test bit position
|
|
--bytesToRead; // Decrement bytes to read since a byte was just read
|
|
}
|
|
if ((b & hb) == 0) // If a 0 is found, found the end of the unary sequence
|
|
break;
|
|
++bytesToRead; // Found a 1 so increment bytes to read
|
|
b &= ~hb; // Clear the bit just tested.
|
|
hb >>= 1; // Shift header test bit to next lowest position
|
|
}
|
|
|
|
value = b; // b contains the highest byte of value
|
|
while (bytesToRead-- != 0) {
|
|
serializer(ar, b); // Read byte
|
|
if (!positive)
|
|
b = ~b; // Negative, so invert bytes read
|
|
value <<= 8; // Shift value up to make room for new byte
|
|
value |= b; // OR the byte into place
|
|
}
|
|
|
|
if (!positive) // If negative, reverse all bits
|
|
value = ~value;
|
|
} else {
|
|
uint8_t buf[sizeof(IntType) * 2];
|
|
int iv = sizeof(buf); // Index of last written value byte
|
|
bool neg = value < 0; // If value is negative, flip its bits
|
|
IntType v = neg ? ~value : value;
|
|
|
|
// Write the value bytes from LSB to the rightmost zero byte to the output buffer
|
|
while (v) {
|
|
buf[--iv] = (uint8_t)v;
|
|
v >>= 8;
|
|
};
|
|
|
|
int bitlen = (sizeof(buf) - iv) * 8; // Value bits written so far
|
|
if (bitlen != 0) { // Reduce bit length by leading 0s in highest value byte
|
|
uint8_t b = buf[iv]; // Get highest value byte
|
|
while (!(b & 0x80)) { // While its highest bit is not a 1
|
|
--bitlen; // Decrement bit length
|
|
b <<= 1; // Shift left to test next lowest position
|
|
}
|
|
}
|
|
|
|
int encodedLen = bitlen / 7 + 1; // Calculate length of total encoded value
|
|
int iStart = sizeof(buf) - encodedLen; // Starting index of encoded output byte
|
|
for (int ih = iStart; ih < iv; ++ih) // Clear any bytes not initialized with a value bit
|
|
buf[ih] = 0;
|
|
int ih = iStart;
|
|
uint8_t b = 0x80; // First unary len bit to set
|
|
for (int hb = encodedLen; hb > 0; --hb) { // Set the sign bit and all but the last unary len bit to 1
|
|
if (b == 0) { // Start writing a new byte if needed
|
|
++ih;
|
|
b = 0x80;
|
|
}
|
|
buf[ih] |= b;
|
|
b >>= 1;
|
|
}
|
|
if (neg) // If negative, bit flip the entire encoded thing
|
|
for (int i = iStart; i < sizeof(buf); ++i)
|
|
buf[i] = ~buf[i];
|
|
|
|
ar.serializeBytes(buf + iStart, encodedLen);
|
|
}
|
|
}
|
|
};
|