foundationdb/fdbclient/Tuple.cpp

265 lines
6.9 KiB
C++

/*
* Tuple.cpp
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "fdbclient/Tuple.h"
static size_t find_string_terminator(const StringRef data, size_t offset) {
size_t i = offset;
while (i < data.size() - 1 && !(data[i] == '\x00' && data[i+1] != (uint8_t)'\xff')) {
i += (data[i] == '\x00' ? 2 : 1);
}
return i;
}
Tuple::Tuple(StringRef const& str, bool exclude_incomplete) {
data.append(data.arena(), str.begin(), str.size());
size_t i = 0;
while(i < data.size()) {
offsets.push_back(i);
if(data[i] == '\x01' || data[i] == '\x02') {
i = find_string_terminator(str, i+1) + 1;
}
else if(data[i] >= '\x0c' && data[i] <= '\x1c') {
i += abs(data[i] - '\x14') + 1;
}
else if(data[i] == '\x00') {
i += 1;
}
else {
throw invalid_tuple_data_type();
}
}
// If incomplete tuples are allowed, remove the last offset if i is now beyond size()
// Strings will never be considered incomplete due to the way the string end is found.
if(exclude_incomplete && i > data.size())
offsets.pop_back();
}
Tuple Tuple::unpack(StringRef const& str, bool exclude_incomplete) {
return Tuple(str, exclude_incomplete);
}
Tuple& Tuple::append(Tuple const& tuple) {
for(size_t offset : tuple.offsets) {
offsets.push_back(offset + data.size());
}
data.append(data.arena(), tuple.data.begin(), tuple.data.size());
return *this;
}
Tuple& Tuple::append(StringRef const& str, bool utf8) {
offsets.push_back(data.size());
const uint8_t utfChar = uint8_t(utf8 ? '\x02' : '\x01');
data.append(data.arena(), &utfChar, 1);
size_t lastPos = 0;
for(size_t pos = 0; pos < str.size(); ++pos) {
if(str[pos] == '\x00') {
data.append(data.arena(), str.begin() + lastPos, pos - lastPos);
data.push_back(data.arena(), (uint8_t)'\x00');
data.push_back(data.arena(), (uint8_t)'\xff');
lastPos = pos + 1;
}
}
data.append(data.arena(), str.begin() + lastPos, str.size() - lastPos);
data.push_back(data.arena(), (uint8_t)'\x00');
return *this;
}
Tuple& Tuple::append( int64_t value ) {
uint64_t swap = value;
bool neg = false;
offsets.push_back( data.size() );
if ( value < 0 ) {
value = ~(-value);
neg = true;
}
swap = bigEndian64(value);
for ( int i = 0; i < 8; i++ ) {
if ( ((uint8_t*)&swap)[i] != (neg ? 255 : 0) ) {
data.push_back( data.arena(), (uint8_t)(20 + (8-i) * (neg ? -1 : 1)) );
data.append( data.arena(), ((const uint8_t *)&swap) + i, 8 - i );
return *this;
}
}
data.push_back( data.arena(), (uint8_t)'\x14' );
return *this;
}
Tuple& Tuple::appendNull() {
offsets.push_back(data.size());
data.push_back(data.arena(), (uint8_t)'\x00');
return *this;
}
Tuple::ElementType Tuple::getType(size_t index) const {
if(index >= offsets.size()) {
throw invalid_tuple_index();
}
uint8_t code = data[offsets[index]];
if(code == '\x00') {
return ElementType::NULL_TYPE;
}
else if(code == '\x01') {
return ElementType::BYTES;
}
else if(code == '\x02') {
return ElementType::UTF8;
}
else if(code >= '\x0c' && code <= '\x1c') {
return ElementType::INT;
}
else {
throw invalid_tuple_data_type();
}
}
Standalone<StringRef> Tuple::getString(size_t index) const {
if(index >= offsets.size()) {
throw invalid_tuple_index();
}
uint8_t code = data[offsets[index]];
if(code != '\x01' && code != '\x02') {
throw invalid_tuple_data_type();
}
size_t b = offsets[index] + 1;
size_t e;
if (offsets.size() > index + 1) {
e = offsets[index+1];
} else {
e = data.size();
}
Standalone<StringRef> result;
VectorRef<uint8_t> staging;
for (size_t i = b; i < e; ++i) {
if(data[i] == '\x00') {
staging.append(result.arena(), data.begin() + b, i - b);
++i;
b = i + 1;
if(i < e) {
staging.push_back(result.arena(), '\x00');
}
}
}
if(b < e) {
staging.append(result.arena(), data.begin() + b, e - b);
}
result.StringRef::operator=(StringRef(staging.begin(), staging.size()));
return result;
}
int64_t Tuple::getInt(size_t index, bool allow_incomplete) const {
if(index >= offsets.size()) {
throw invalid_tuple_index();
}
int64_t swap;
bool neg = false;
ASSERT(offsets[index] < data.size());
uint8_t code = data[offsets[index]];
if(code < '\x0c' || code > '\x1c') {
throw invalid_tuple_data_type();
}
int8_t len = code - '\x14';
if ( len < 0 ) {
len = -len;
neg = true;
}
memset( &swap, neg ? '\xff' : 0, 8 - len );
// presentLen is how many of len bytes are actually present, it will be < len if the encoded tuple was truncated
int presentLen = std::min<int8_t>(len, data.size() - offsets[index] - 1);
ASSERT(len == presentLen || allow_incomplete);
memcpy( ((uint8_t*)&swap) + 8 - len, data.begin() + offsets[index] + 1, presentLen );
if(presentLen < len) {
int suffix = len - presentLen;
if(presentLen == 0) {
// The first byte in an int would always be at least 1, because if was 0 then a shorter int type would have been used.
// So if we don't have the first (most significant) byte in the encoded string, use 1 so that the decoded result
// maintains the encoded form's sort order with an encoded value of a shorter and same-signed type.
*( ((uint8_t*)&swap) + 8 - len) = 1;
--suffix; // The suffix to clear below is now 1 byte shorter.
}
memset( ((uint8_t*)&swap) + 8 - suffix, 0, suffix );
}
swap = bigEndian64( swap );
if ( neg ) {
swap = -(~swap);
}
return swap;
}
KeyRange Tuple::range(Tuple const& tuple) const {
VectorRef<uint8_t> begin;
VectorRef<uint8_t> end;
KeyRange keyRange;
begin.reserve(keyRange.arena(), data.size() + tuple.pack().size() + 1);
begin.append(keyRange.arena(), data.begin(), data.size());
begin.append(keyRange.arena(), tuple.pack().begin(), tuple.pack().size());
begin.push_back(keyRange.arena(), uint8_t('\x00'));
end.reserve(keyRange.arena(), data.size() + tuple.pack().size() + 1);
end.append(keyRange.arena(), data.begin(), data.size());
end.append(keyRange.arena(), tuple.pack().begin(), tuple.pack().size());
end.push_back(keyRange.arena(), uint8_t('\xff'));
keyRange.KeyRangeRef::operator=(KeyRangeRef(StringRef(begin.begin(), begin.size()), StringRef(end.begin(), end.size())));
return keyRange;
}
Tuple Tuple::subTuple(size_t start, size_t end) const {
if(start >= offsets.size() || end <= start) {
return Tuple();
}
size_t endPos = end < offsets.size() ? offsets[end] : data.size();
return Tuple(StringRef(data.begin() + offsets[start], endPos - offsets[start]));
}