pack Tuples with a single byte array allocation of the right size

This commit is contained in:
Alec Grieser 2019-02-25 21:59:16 -08:00
parent a74dfa5487
commit 663d750e1d
No known key found for this signature in database
GPG Key ID: CAF63551C60D3462
8 changed files with 547 additions and 237 deletions

View File

@ -20,7 +20,6 @@
package com.apple.foundationdb.tuple; package com.apple.foundationdb.tuple;
import java.math.BigInteger;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.ByteOrder; import java.nio.ByteOrder;
import java.util.Arrays; import java.util.Arrays;
@ -154,7 +153,10 @@ public class ByteArrayUtil {
* @return a newly created array where {@code pattern} replaced with {@code replacement} * @return a newly created array where {@code pattern} replaced with {@code replacement}
*/ */
public static byte[] replace(byte[] src, byte[] pattern, byte[] replacement) { public static byte[] replace(byte[] src, byte[] pattern, byte[] replacement) {
return join(replacement, split(src, pattern)); if(src == null) {
return null;
}
return replace(src, 0, src.length, pattern, replacement);
} }
/** /**
@ -171,7 +173,69 @@ public class ByteArrayUtil {
*/ */
public static byte[] replace(byte[] src, int offset, int length, public static byte[] replace(byte[] src, int offset, int length,
byte[] pattern, byte[] replacement) { byte[] pattern, byte[] replacement) {
return join(replacement, split(src, offset, length, pattern)); if(pattern == null || pattern.length == 0) {
return Arrays.copyOfRange(src, offset, offset + length);
}
ByteBuffer dest;
if(replacement == null || replacement.length != pattern.length) {
// Array might change size. This is the "tricky" case.
byte patternFirst = pattern[0];
int patternOccurrences = 0;
int currentPosition = offset;
while(currentPosition < offset + length) {
if(src[currentPosition] == patternFirst && regionEquals(src, currentPosition, pattern)) {
patternOccurrences++;
currentPosition += pattern.length;
}
else {
currentPosition++;
}
}
if(patternOccurrences == 0) {
// Pattern doesn't occur. Just return a copy of the needed region.
return Arrays.copyOfRange(src, offset, offset + length);
}
int replacementLength = (replacement == null) ? 0 : replacement.length;
int newLength = length + patternOccurrences * (replacementLength - pattern.length);
if(newLength == 0) {
return new byte[0];
}
else {
dest = ByteBuffer.allocate(newLength);
}
}
else {
// No matter what, the array will stay the same size as replacement.length = pattern.length
dest = ByteBuffer.allocate(length);
}
replace(src, offset, length, pattern, replacement, dest);
return dest.array();
}
static void replace(byte[] src, int offset, int length, byte[] pattern, byte[] replacement, ByteBuffer dest) {
if(pattern == null || pattern.length == 0) {
dest.put(src, offset, length);
return;
}
byte patternFirst = pattern[0];
int lastPosition = offset;
int currentPosition = offset;
while(currentPosition < offset + length) {
if(src[currentPosition] == patternFirst && regionEquals(src, currentPosition, pattern)) {
dest.put(src, lastPosition, currentPosition - lastPosition);
if(replacement != null) {
dest.put(replacement);
}
currentPosition += pattern.length;
lastPosition = currentPosition;
}
else {
currentPosition++;
}
}
dest.put(src, lastPosition, currentPosition - lastPosition);
} }
/** /**
@ -203,7 +267,7 @@ public class ByteArrayUtil {
* @return a list of byte arrays from {@code src} now not containing {@code delimiter} * @return a list of byte arrays from {@code src} now not containing {@code delimiter}
*/ */
public static List<byte[]> split(byte[] src, int offset, int length, byte[] delimiter) { public static List<byte[]> split(byte[] src, int offset, int length, byte[] delimiter) {
List<byte[]> parts = new LinkedList<byte[]>(); List<byte[]> parts = new LinkedList<>();
int idx = offset; int idx = offset;
int lastSplitEnd = offset; int lastSplitEnd = offset;
while(idx <= (offset+length) - delimiter.length) { while(idx <= (offset+length) - delimiter.length) {
@ -225,13 +289,6 @@ public class ByteArrayUtil {
return parts; return parts;
} }
static int bisectLeft(BigInteger[] arr, BigInteger i) {
int n = Arrays.binarySearch(arr, i);
if(n >= 0)
return n;
return (n + 1) * -1;
}
/** /**
* Compare byte arrays for equality and ordering purposes. Elements in the array * Compare byte arrays for equality and ordering purposes. Elements in the array
* are interpreted and compared as unsigned bytes. Neither parameter * are interpreted and compared as unsigned bytes. Neither parameter
@ -276,61 +333,6 @@ public class ByteArrayUtil {
return true; return true;
} }
/**
* Scan through an array of bytes to find the first occurrence of a specific value.
*
* @param src array to scan. Must not be {@code null}.
* @param what the value for which to search.
* @param start the index at which to start the search. If this is at or after
* the end of {@code src}, the result will always be {@code -1}.
* @param end the index one past the last entry at which to search
*
* @return return the location of the first instance of {@code value}, or
* {@code -1} if not found.
*/
static int findNext(byte[] src, byte what, int start, int end) {
for(int i = start; i < end; i++) {
if(src[i] == what)
return i;
}
return -1;
}
/**
* Gets the index of the first element after the next occurrence of the byte sequence [nm]
* @param v the bytes to scan through
* @param n first character to find
* @param m second character to find
* @param start the index at which to start the scan
*
* @return the index after the next occurrence of [nm]
*/
static int findTerminator(byte[] v, byte n, byte m, int start) {
return findTerminator(v, n, m, start, v.length);
}
/**
* Gets the index of the first element after the next occurrence of the byte sequence [nm]
* @param v the bytes to scan through
* @param n first character to find
* @param m second character to find
* @param start the index at which to start the scan
* @param end the index at which to stop the search (exclusive)
*
* @return the index after the next occurrence of [nm]
*/
static int findTerminator(byte[] v, byte n, byte m, int start, int end) {
int pos = start;
while(true) {
pos = findNext(v, n, pos, end);
if(pos < 0)
return end;
if(pos + 1 == end || v[pos+1] != m)
return pos;
pos += 2;
}
}
/** /**
* Computes the first key that would sort outside the range prefixed by {@code key}. * Computes the first key that would sort outside the range prefixed by {@code key}.
* {@code key} must be non-null, and contain at least some character this is not * {@code key} must be non-null, and contain at least some character this is not
@ -417,5 +419,14 @@ public class ByteArrayUtil {
return s.toString(); return s.toString();
} }
static int nullCount(byte[] val) {
int nulls = 0;
for(int i = 0; i < val.length; i++) {
if(val[i] == 0x00)
nulls += 1;
}
return nulls;
}
private ByteArrayUtil() {} private ByteArrayUtil() {}
} }

View File

@ -71,5 +71,48 @@ final class StringUtil {
return Character.compare(c1, c2); return Character.compare(c1, c2);
} }
static int packedSize(String s) {
final int strLength = s.length();
int size = 0;
int pos = 0;
while(pos < strLength) {
char c = s.charAt(pos);
if(c == '\0') {
// Null is encoded as \x00\xff
size += 2;
}
else if(c <= 0x7f) {
// ASCII code point. Only 1 byte.
size += 1;
}
else if(c <= 0x07ff) {
// 2 byte code point
size += 2;
}
else if(Character.isHighSurrogate(c)) {
if(pos + 1 < s.length() && Character.isLowSurrogate(s.charAt(pos + 1))) {
// High surrogate followed by low surrogate means the code point
// is between U+10000 and U+10FFFF, so it requires 4 bytes.
size += 4;
pos += 1;
}
else {
throw new IllegalArgumentException("malformed UTF-16 has high surrogate not followed by low surrogate");
}
}
else if(Character.isLowSurrogate(c)) {
throw new IllegalArgumentException("malformed UTF-16 has low surrogate without prior high surrogate");
}
else {
// 3 byte code point
size += 3;
}
pos += 1;
}
return size;
}
private StringUtil() {} private StringUtil() {}
} }

View File

@ -73,6 +73,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
private List<Object> elements; private List<Object> elements;
private int memoizedHash = 0; private int memoizedHash = 0;
private byte[] packed = null; private byte[] packed = null;
private int memoizedPackedSize = -1;
private Tuple(List<? extends Object> elements, Object newItem) { private Tuple(List<? extends Object> elements, Object newItem) {
this(elements); this(elements);
@ -83,12 +84,6 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
this.elements = new ArrayList<>(elements); this.elements = new ArrayList<>(elements);
} }
private enum VersionstampExpectations {
UNKNOWN,
HAS_INCOMPLETE,
HAS_NO_INCOMPLETE
}
/** /**
* Creates a copy of this {@code Tuple} with an appended last element. The parameter * Creates a copy of this {@code Tuple} with an appended last element. The parameter
* is untyped but only {@link String}, {@code byte[]}, {@link Number}s, {@link UUID}s, * is untyped but only {@link String}, {@code byte[]}, {@link Number}s, {@link UUID}s,
@ -313,13 +308,15 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
byte[] packInternal(byte[] prefix, boolean copy) { byte[] packInternal(byte[] prefix, boolean copy) {
boolean hasPrefix = prefix != null && prefix.length > 1; boolean hasPrefix = prefix != null && prefix.length > 1;
if(packed == null) { if(packed == null) {
byte[] result = TupleUtil.pack(elements, prefix); byte[] result = TupleUtil.pack(elements, prefix, getPackedSize());
if(hasPrefix) { if(hasPrefix) {
packed = Arrays.copyOfRange(result, prefix.length, result.length); packed = Arrays.copyOfRange(result, prefix.length, result.length);
memoizedPackedSize = packed.length;
return result; return result;
} }
else { else {
packed = result; packed = result;
memoizedPackedSize = packed.length;
} }
} }
if(hasPrefix) { if(hasPrefix) {
@ -366,21 +363,23 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
* @throws IllegalArgumentException if there is not exactly one incomplete {@link Versionstamp} included in this {@code Tuple} * @throws IllegalArgumentException if there is not exactly one incomplete {@link Versionstamp} included in this {@code Tuple}
*/ */
public byte[] packWithVersionstamp(byte[] prefix) { public byte[] packWithVersionstamp(byte[] prefix) {
return TupleUtil.packWithVersionstamp(elements, prefix); return TupleUtil.packWithVersionstamp(elements, prefix, getPackedSize());
} }
byte[] packWithVersionstampInternal(byte[] prefix, boolean copy) { byte[] packWithVersionstampInternal(byte[] prefix, boolean copy) {
boolean hasPrefix = prefix != null && prefix.length > 0; boolean hasPrefix = prefix != null && prefix.length > 0;
if(packed == null) { if(packed == null) {
byte[] result = TupleUtil.packWithVersionstamp(elements, prefix); byte[] result = TupleUtil.packWithVersionstamp(elements, prefix, getPackedSize());
if(hasPrefix) { if(hasPrefix) {
byte[] withoutPrefix = Arrays.copyOfRange(result, prefix.length, result.length); byte[] withoutPrefix = Arrays.copyOfRange(result, prefix.length, result.length);
TupleUtil.adjustVersionPosition(packed, -1 * prefix.length); TupleUtil.adjustVersionPosition(packed, -1 * prefix.length);
packed = withoutPrefix; packed = withoutPrefix;
memoizedPackedSize = packed.length;
return result; return result;
} }
else { else {
packed = result; packed = result;
memoizedPackedSize = packed.length;
} }
} }
if(hasPrefix) { if(hasPrefix) {
@ -398,13 +397,13 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
} }
} }
byte[] packMaybeVersionstamp(byte[] prefix) { byte[] packMaybeVersionstamp() {
if(packed == null) { if(packed == null) {
if(hasIncompleteVersionstamp()) { if(hasIncompleteVersionstamp()) {
return packWithVersionstampInternal(prefix, false); return packWithVersionstampInternal(null, false);
} }
else { else {
return packInternal(prefix, false); return packInternal(null, false);
} }
} }
else { else {
@ -489,6 +488,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
Tuple t = new Tuple(); Tuple t = new Tuple();
t.elements = TupleUtil.unpack(bytes, offset, length); t.elements = TupleUtil.unpack(bytes, offset, length);
t.packed = Arrays.copyOfRange(bytes, offset, offset + length); t.packed = Arrays.copyOfRange(bytes, offset, offset + length);
t.memoizedPackedSize = length;
return t; return t;
} }
@ -727,11 +727,14 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
Object o = this.elements.get(index); Object o = this.elements.get(index);
if(o == null) { if(o == null) {
return null; return null;
} else if(o instanceof Tuple) { }
else if(o instanceof Tuple) {
return (Tuple)o; return (Tuple)o;
} else if(o instanceof List<?>) { }
return Tuple.fromItems((List<? extends Object>)o); else if(o instanceof List<?>) {
} else { return Tuple.fromItems((List<?>)o);
}
else {
throw new ClassCastException("Cannot convert item of type " + o.getClass() + " to tuple"); throw new ClassCastException("Cannot convert item of type " + o.getClass() + " to tuple");
} }
} }
@ -824,16 +827,23 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
} }
/** /**
* Get the number of bytes in the packed representation of this {@code Tuple}. Note that at the * Get the number of bytes in the packed representation of this {@code Tuple}. This is done by summing
* moment, this number is calculated by packing the {@code Tuple} and looking at its size. This method * the serialized sizes of all of the elements of this {@code Tuple} and does not pack everything
* will memoize the result, however, so asking the same {@code Tuple} for its size multiple times * into a single {@code Tuple}. The return value of this function is stored within this {@code Tuple}
* is a fast operation. * after this function has been called so that subsequent calls on the same object are fast. This method
* does not validate that there is no more than one incomplete {@link Versionstamp} in this {@code Tuple}.
* *
* @return the number of bytes in the packed representation of this {@code Tuple} * @return the number of bytes in the packed representation of this {@code Tuple}
*/ */
public int getPackedSize() { public int getPackedSize() {
byte[] p = packMaybeVersionstamp(null); if(memoizedPackedSize < 0) {
return p.length; memoizedPackedSize = getPackedSize(false);
}
return memoizedPackedSize;
}
int getPackedSize(boolean nested) {
return TupleUtil.getPackedSize(elements, nested);
} }
/** /**
@ -871,7 +881,7 @@ public class Tuple implements Comparable<Tuple>, Iterable<Object> {
@Override @Override
public int hashCode() { public int hashCode() {
if(memoizedHash == 0) { if(memoizedHash == 0) {
memoizedHash = Arrays.hashCode(packMaybeVersionstamp(null)); memoizedHash = Arrays.hashCode(packMaybeVersionstamp());
} }
return memoizedHash; return memoizedHash;
} }

View File

@ -36,11 +36,10 @@ import com.apple.foundationdb.FDB;
class TupleUtil { class TupleUtil {
private static final byte nil = 0x00; private static final byte nil = 0x00;
private static final BigInteger[] BIG_INT_SIZE_LIMITS; private static final Charset UTF8 = Charset.forName("UTF-8");
private static final Charset UTF8;
private static final BigInteger LONG_MIN_VALUE = BigInteger.valueOf(Long.MIN_VALUE); private static final BigInteger LONG_MIN_VALUE = BigInteger.valueOf(Long.MIN_VALUE);
private static final BigInteger LONG_MAX_VALUE = BigInteger.valueOf(Long.MAX_VALUE); private static final BigInteger LONG_MAX_VALUE = BigInteger.valueOf(Long.MAX_VALUE);
private static final IterableComparator iterableComparator; private static final IterableComparator iterableComparator = new IterableComparator();
private static final byte BYTES_CODE = 0x01; private static final byte BYTES_CODE = 0x01;
private static final byte STRING_CODE = 0x02; private static final byte STRING_CODE = 0x02;
@ -57,26 +56,11 @@ class TupleUtil {
private static final byte[] NULL_ARR = new byte[] {nil}; private static final byte[] NULL_ARR = new byte[] {nil};
private static final byte[] NULL_ESCAPED_ARR = new byte[] {nil, (byte)0xFF}; private static final byte[] NULL_ESCAPED_ARR = new byte[] {nil, (byte)0xFF};
private static final byte[] BYTES_ARR = new byte[]{BYTES_CODE};
private static final byte[] STRING_ARR = new byte[]{STRING_CODE};
private static final byte[] NESTED_ARR = new byte[]{NESTED_CODE};
private static final byte[] INT_ZERO_ARR = new byte[]{INT_ZERO_CODE};
private static final byte[] FALSE_ARR = new byte[]{FALSE_CODE};
private static final byte[] TRUE_ARR = new byte[]{TRUE_CODE};
private static final byte[] VERSIONSTAMP_ARR = new byte[]{VERSIONSTAMP_CODE};
static {
BIG_INT_SIZE_LIMITS = new BigInteger[9];
for(int i = 0; i < BIG_INT_SIZE_LIMITS.length; i++) {
BIG_INT_SIZE_LIMITS[i] = (BigInteger.ONE).shiftLeft(i * 8).subtract(BigInteger.ONE);
}
UTF8 = Charset.forName("UTF-8");
iterableComparator = new IterableComparator();
}
static class DecodeState { static class DecodeState {
final List<Object> values; final List<Object> values;
int end; int end;
int nullCount; // Basically a hack to allow findTerminator to return the terminator and null count
DecodeState() { DecodeState() {
values = new ArrayList<>(); values = new ArrayList<>();
@ -87,15 +71,36 @@ class TupleUtil {
values.add(value); values.add(value);
this.end = end; this.end = end;
} }
int findNullTerminator(byte[] bytes, int from, int to) {
nullCount = 0;
int x = from;
while(x < to) {
if(bytes[x] == 0x00) {
if(x + 1 >= to || bytes[x + 1] != (byte)0xFF) {
return x;
}
else {
nullCount++;
x += 2;
}
}
else {
x += 1;
}
}
throw new IllegalArgumentException("no terminator found for bytes starting at " + from);
}
} }
static class EncodeState { static class EncodeState {
final List<byte[]> encodedValues; final ByteBuffer encodedBytes;
int totalLength; int totalLength;
int versionPos; int versionPos;
EncodeState(int capacity) { EncodeState(ByteBuffer dest) {
this.encodedValues = new ArrayList<>(capacity); encodedBytes = dest;
encodedBytes.order(ByteOrder.BIG_ENDIAN);
totalLength = 0; totalLength = 0;
versionPos = -1; versionPos = -1;
} }
@ -104,25 +109,52 @@ class TupleUtil {
if(versionPos >= 0 && this.versionPos >= 0) { if(versionPos >= 0 && this.versionPos >= 0) {
throw new IllegalArgumentException("Multiple incomplete Versionstamps included in Tuple"); throw new IllegalArgumentException("Multiple incomplete Versionstamps included in Tuple");
} }
encodedValues.add(encoded); encodedBytes.put(encoded);
totalLength += encoded.length; totalLength += encoded.length;
this.versionPos = versionPos; this.versionPos = versionPos;
return this; return this;
} }
EncodeState add(byte[] encoded) { EncodeState add(byte[] encoded) {
encodedValues.add(encoded); encodedBytes.put(encoded);
totalLength += encoded.length; totalLength += encoded.length;
return this; return this;
} }
}
static int byteLength(byte[] bytes) { EncodeState add(byte[] encoded, int offset, int length) {
for(int i = 0; i < bytes.length; i++) { encodedBytes.put(encoded, offset, length);
if(bytes[i] == 0x00) continue; totalLength += length;
return bytes.length - i; return this;
}
EncodeState addNullEscaped(byte[] encoded) {
int nullCount = ByteArrayUtil.nullCount(encoded);
if(nullCount == 0) {
encodedBytes.put(encoded);
}
else {
ByteArrayUtil.replace(encoded, 0, encoded.length, NULL_ARR, NULL_ESCAPED_ARR, encodedBytes);
}
return this;
}
EncodeState add(byte b) {
encodedBytes.put(b);
totalLength++;
return this;
}
EncodeState add(int i) {
encodedBytes.putInt(i);
totalLength += Integer.BYTES;
return this;
}
EncodeState add(long l) {
encodedBytes.putLong(l);
totalLength += Long.BYTES;
return this;
} }
return 0;
} }
// These four functions are for adjusting the encoding of floating point numbers so // These four functions are for adjusting the encoding of floating point numbers so
@ -153,11 +185,16 @@ class TupleUtil {
return Double.longBitsToDouble(origBits); return Double.longBitsToDouble(origBits);
} }
// Get the number of bytes in the representation of a long. // Get the minimal number of bytes in the representation of a long.
static int byteCount(long i) { static int minimalByteCount(long i) {
return (Long.SIZE + 7 - Long.numberOfLeadingZeros(i >= 0 ? i : -i)) / 8; return (Long.SIZE + 7 - Long.numberOfLeadingZeros(i >= 0 ? i : -i)) / 8;
} }
static int minimalByteCount(BigInteger i) {
int bitLength = (i.compareTo(BigInteger.ZERO) >= 0) ? i.bitLength() : i.negate().bitLength();
return (bitLength + 7) / 8;
}
private static void adjustVersionPosition300(byte[] packed, int delta) { private static void adjustVersionPosition300(byte[] packed, int delta) {
int offsetOffset = packed.length - Short.BYTES; int offsetOffset = packed.length - Short.BYTES;
ByteBuffer buffer = ByteBuffer.wrap(packed, offsetOffset, Short.BYTES).order(ByteOrder.LITTLE_ENDIAN); ByteBuffer buffer = ByteBuffer.wrap(packed, offsetOffset, Short.BYTES).order(ByteOrder.LITTLE_ENDIAN);
@ -224,7 +261,7 @@ class TupleUtil {
state.add(NULL_ESCAPED_ARR); state.add(NULL_ESCAPED_ARR);
} }
else { else {
state.add(NULL_ARR); state.add(nil);
} }
} }
else if(t instanceof byte[]) else if(t instanceof byte[])
@ -258,133 +295,104 @@ class TupleUtil {
} }
static void encode(EncodeState state, byte[] bytes) { static void encode(EncodeState state, byte[] bytes) {
byte[] escaped = ByteArrayUtil.replace(bytes, NULL_ARR, NULL_ESCAPED_ARR); state.add(BYTES_CODE).addNullEscaped(bytes).add(nil);
state.add(BYTES_ARR).add(escaped).add(NULL_ARR);
} }
static void encode(EncodeState state, String s) { static void encode(EncodeState state, String s) {
byte[] escaped = ByteArrayUtil.replace(s.getBytes(UTF8), NULL_ARR, NULL_ESCAPED_ARR); byte[] bytes = s.getBytes(UTF8);
state.add(STRING_ARR).add(escaped).add(NULL_ARR); state.add(STRING_CODE).addNullEscaped(bytes).add(nil);
} }
static void encode(EncodeState state, BigInteger i) { static void encode(EncodeState state, BigInteger i) {
//System.out.println("Encoding integral " + i); //System.out.println("Encoding integral " + i);
if(i.equals(BigInteger.ZERO)) { if(i.equals(BigInteger.ZERO)) {
state.add(INT_ZERO_ARR); state.add(INT_ZERO_CODE);
return; return;
} }
byte[] bytes = i.toByteArray(); int n = minimalByteCount(i);
if(n > 0xff) {
throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)");
}
if(i.compareTo(BigInteger.ZERO) > 0) { if(i.compareTo(BigInteger.ZERO) > 0) {
if(i.compareTo(BIG_INT_SIZE_LIMITS[BIG_INT_SIZE_LIMITS.length-1]) > 0) { byte[] bytes = i.toByteArray();
int length = byteLength(bytes); if(n > Long.BYTES) {
if(length > 0xff) { state.add(POS_INT_END);
throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)"); state.add((byte)n);
} state.add(bytes, bytes.length - n, n);
byte[] intBytes = new byte[length + 2];
intBytes[0] = POS_INT_END;
intBytes[1] = (byte)(length);
System.arraycopy(bytes, bytes.length - length, intBytes, 2, length);
state.add(intBytes);
} }
else { else {
int n = ByteArrayUtil.bisectLeft(BIG_INT_SIZE_LIMITS, i);
assert n <= BIG_INT_SIZE_LIMITS.length;
//System.out.println(" -- integral has 'n' of " + n + " and output bytes of " + bytes.length); //System.out.println(" -- integral has 'n' of " + n + " and output bytes of " + bytes.length);
byte[] intBytes = new byte[n + 1]; state.add((byte)(INT_ZERO_CODE + n));
intBytes[0] = (byte) (INT_ZERO_CODE + n); state.add(bytes, bytes.length - n, n);
System.arraycopy(bytes, bytes.length - n, intBytes, 1, n);
state.add(intBytes);
} }
} }
else { else {
if(i.negate().compareTo(BIG_INT_SIZE_LIMITS[BIG_INT_SIZE_LIMITS.length - 1]) > 0) { byte[] bytes = i.subtract(BigInteger.ONE).toByteArray();
int length = byteLength(i.negate().toByteArray()); if(n > Long.BYTES) {
if (length > 0xff) { state.add(NEG_INT_START);
throw new IllegalArgumentException("BigInteger magnitude is too large (more than 255 bytes)"); state.add((byte)(n ^ 0xff));
if(bytes.length >= n) {
state.add(bytes, bytes.length - n, n);
} }
BigInteger offset = BigInteger.ONE.shiftLeft(length * 8).subtract(BigInteger.ONE); else {
byte[] adjusted = i.add(offset).toByteArray(); for(int x = 0; x < n - bytes.length; x++) {
byte[] intBytes = new byte[length + 2]; state.add((byte)0x00);
intBytes[0] = NEG_INT_START; }
intBytes[1] = (byte) (length ^ 0xff); state.add(bytes, 0, bytes.length);
if (adjusted.length >= length) {
System.arraycopy(adjusted, adjusted.length - length, intBytes, 2, length);
} else {
Arrays.fill(intBytes, 2, intBytes.length - adjusted.length, (byte) 0x00);
System.arraycopy(adjusted, 0, intBytes, intBytes.length - adjusted.length, adjusted.length);
} }
state.add(intBytes);
} }
else { else {
int n = ByteArrayUtil.bisectLeft(BIG_INT_SIZE_LIMITS, i.negate()); state.add((byte)(INT_ZERO_CODE - n));
if(bytes.length >= n) {
assert n >= 0 && n < BIG_INT_SIZE_LIMITS.length; // can we do this? it seems to be required for the following statement state.add(bytes, bytes.length - n, n);
}
long maxv = BIG_INT_SIZE_LIMITS[n].add(i).longValue(); else {
byte[] adjustedBytes = ByteBuffer.allocate(8).order(ByteOrder.BIG_ENDIAN).putLong(maxv).array(); for(int x = 0; x < n - bytes.length; x++) {
byte[] intBytes = new byte[n + 1]; state.add((byte)0x00);
intBytes[0] = (byte) (INT_ZERO_CODE - n); }
System.arraycopy(adjustedBytes, adjustedBytes.length - n, intBytes, 1, n); state.add(bytes, 0, bytes.length);
state.add(intBytes); }
} }
} }
} }
static void encode(EncodeState state, long i) { static void encode(EncodeState state, long i) {
if(i == 0L) { if(i == 0L) {
state.add(INT_ZERO_ARR); state.add(INT_ZERO_CODE);
return; return;
} }
int n = byteCount(i); int n = minimalByteCount(i);
byte[] intBytes = new byte[n + 1];
// First byte encodes number of bytes (as difference from INT_ZERO_CODE) // First byte encodes number of bytes (as difference from INT_ZERO_CODE)
intBytes[0] = (byte)(INT_ZERO_CODE + (i >= 0 ? n : -n)); state.add((byte)(INT_ZERO_CODE + (i >= 0 ? n : -n)));
// For positive integers, copy the bytes in big-endian order excluding leading 0x00 bytes. // For positive integers, copy the bytes in big-endian order excluding leading 0x00 bytes.
// For negative integers, copy the bytes of the one's complement representation excluding // For negative integers, copy the bytes of the one's complement representation excluding
// the leading 0xff bytes. As Java stores negative values in two's complement, we subtract 1 // the leading 0xff bytes. As Java stores negative values in two's complement, we subtract 1
// from negative values. // from negative values.
long val = Long.reverseBytes((i >= 0) ? i : (i - 1)) >> (Long.SIZE - 8 * n); long val = Long.reverseBytes((i >= 0) ? i : (i - 1)) >> (Long.SIZE - 8 * n);
for(int x = 1; x < intBytes.length; x++) { for(int x = 0; x < n; x++) {
intBytes[x] = (byte)(val & 0xff); state.add((byte)(val & 0xff));
val >>= 8; val >>= 8;
} }
state.add(intBytes);
} }
static void encode(EncodeState state, Float f) { static void encode(EncodeState state, Float f) {
byte[] floatBytes = ByteBuffer.allocate(1 + Float.BYTES).order(ByteOrder.BIG_ENDIAN) state.add(FLOAT_CODE).add(encodeFloatBits(f));
.put(FLOAT_CODE)
.putInt(encodeFloatBits(f))
.array();
state.add(floatBytes);
} }
static void encode(EncodeState state, Double d) { static void encode(EncodeState state, Double d) {
byte[] doubleBytes = ByteBuffer.allocate(1 + Double.BYTES).order(ByteOrder.BIG_ENDIAN) state.add(DOUBLE_CODE).add(encodeDoubleBits(d));
.put(DOUBLE_CODE)
.putLong(encodeDoubleBits(d))
.array();
state.add(doubleBytes);
} }
static void encode(EncodeState state, Boolean b) { static void encode(EncodeState state, Boolean b) {
if(b) { state.add(b ? TRUE_CODE : FALSE_CODE);
state.add(TRUE_ARR);
}
else {
state.add(FALSE_ARR);
}
} }
static void encode(EncodeState state, UUID uuid) { static void encode(EncodeState state, UUID uuid) {
byte[] uuidBytes = ByteBuffer.allocate(17).put(UUID_CODE).order(ByteOrder.BIG_ENDIAN) state.add(UUID_CODE).add(uuid.getMostSignificantBits()).add(uuid.getLeastSignificantBits());
.putLong(uuid.getMostSignificantBits()).putLong(uuid.getLeastSignificantBits())
.array();
state.add(uuidBytes);
} }
static void encode(EncodeState state, Versionstamp v) { static void encode(EncodeState state, Versionstamp v) {
state.add(VERSIONSTAMP_ARR); state.add(VERSIONSTAMP_CODE);
if(v.isComplete()) { if(v.isComplete()) {
state.add(v.getBytes()); state.add(v.getBytes());
} }
@ -394,11 +402,11 @@ class TupleUtil {
} }
static void encode(EncodeState state, List<?> value) { static void encode(EncodeState state, List<?> value) {
state.add(NESTED_ARR); state.add(NESTED_CODE);
for(Object t : value) { for(Object t : value) {
encode(state, t, true); encode(state, t, true);
} }
state.add(NULL_ARR); state.add(nil);
} }
static void decode(DecodeState state, byte[] rep, int pos, int last) { static void decode(DecodeState state, byte[] rep, int pos, int last) {
@ -411,17 +419,32 @@ class TupleUtil {
state.add(null, start); state.add(null, start);
} }
else if(code == BYTES_CODE) { else if(code == BYTES_CODE) {
int end = ByteArrayUtil.findTerminator(rep, (byte)0x0, (byte)0xff, start, last); int end = state.findNullTerminator(rep, start, last);
//System.out.println("End of byte string: " + end); //System.out.println("End of byte string: " + end);
byte[] range = ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, new byte[] { nil }); byte[] range;
if(state.nullCount == 0) {
range = Arrays.copyOfRange(rep, start, end);
}
else {
ByteBuffer dest = ByteBuffer.allocate(end - start - state.nullCount);
ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, NULL_ARR, dest);
range = dest.array();
}
//System.out.println(" -> byte string contents: '" + ArrayUtils.printable(range) + "'"); //System.out.println(" -> byte string contents: '" + ArrayUtils.printable(range) + "'");
state.add(range, end + 1); state.add(range, end + 1);
} }
else if(code == STRING_CODE) { else if(code == STRING_CODE) {
int end = ByteArrayUtil.findTerminator(rep, (byte)0x0, (byte)0xff, start, last); int end = state.findNullTerminator(rep, start, last);
//System.out.println("End of UTF8 string: " + end); //System.out.println("End of UTF8 string: " + end);
byte[] stringBytes = ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, new byte[] { nil }); String str;
String str = new String(stringBytes, UTF8); if(state.nullCount == 0) {
str = new String(rep, start, end - start, UTF8);
}
else {
ByteBuffer dest = ByteBuffer.allocate(end - start - state.nullCount);
ByteArrayUtil.replace(rep, start, end - start, NULL_ESCAPED_ARR, NULL_ARR, dest);
str = new String(dest.array(), UTF8);
}
//System.out.println(" -> UTF8 string contents: '" + str + "'"); //System.out.println(" -> UTF8 string contents: '" + str + "'");
state.add(str, end + 1); state.add(str, end + 1);
} }
@ -442,19 +465,23 @@ class TupleUtil {
state.add(true, start); state.add(true, start);
} }
else if(code == UUID_CODE) { else if(code == UUID_CODE) {
ByteBuffer bb = ByteBuffer.wrap(rep, start, 16).order(ByteOrder.BIG_ENDIAN); ByteBuffer bb = ByteBuffer.wrap(rep, start, 2 * Long.BYTES).order(ByteOrder.BIG_ENDIAN);
long msb = bb.getLong(); long msb = bb.getLong();
long lsb = bb.getLong(); long lsb = bb.getLong();
state.add(new UUID(msb, lsb), start + 16); state.add(new UUID(msb, lsb), start + 16);
} }
else if(code == POS_INT_END) { else if(code == POS_INT_END) {
int n = rep[start] & 0xff; int n = rep[start] & 0xff;
BigInteger res = new BigInteger(ByteArrayUtil.join(new byte[]{0x00}, Arrays.copyOfRange(rep, start+1, start+n+1))); byte[] intBytes = new byte[n + 1];
System.arraycopy(rep, start + 1, intBytes, 1, n);
BigInteger res = new BigInteger(intBytes);
state.add(res, start + n + 1); state.add(res, start + n + 1);
} }
else if(code == NEG_INT_START) { else if(code == NEG_INT_START) {
int n = (rep[start] ^ 0xff) & 0xff; int n = (rep[start] ^ 0xff) & 0xff;
BigInteger origValue = new BigInteger(ByteArrayUtil.join(new byte[]{0x00}, Arrays.copyOfRange(rep, start+1, start+n+1))); byte[] intBytes = new byte[n + 1];
System.arraycopy(rep, start + 1, intBytes, 1, n);
BigInteger origValue = new BigInteger(intBytes);
BigInteger offset = BigInteger.ONE.shiftLeft(n*8).subtract(BigInteger.ONE); BigInteger offset = BigInteger.ONE.shiftLeft(n*8).subtract(BigInteger.ONE);
state.add(origValue.subtract(offset), start + n + 1); state.add(origValue.subtract(offset), start + n + 1);
} }
@ -464,7 +491,7 @@ class TupleUtil {
int n = positive ? code - INT_ZERO_CODE : INT_ZERO_CODE - code; int n = positive ? code - INT_ZERO_CODE : INT_ZERO_CODE - code;
int end = start + n; int end = start + n;
if(rep.length < end) { if(rep.length < last) {
throw new RuntimeException("Invalid tuple (possible truncation)"); throw new RuntimeException("Invalid tuple (possible truncation)");
} }
@ -509,9 +536,9 @@ class TupleUtil {
else if(code == NESTED_CODE) { else if(code == NESTED_CODE) {
DecodeState subResult = new DecodeState(); DecodeState subResult = new DecodeState();
int endPos = start; int endPos = start;
while(endPos < rep.length) { while(endPos < last) {
if(rep[endPos] == nil) { if(rep[endPos] == nil) {
if(endPos + 1 < rep.length && rep[endPos+1] == (byte)0xff) { if(endPos + 1 < last && rep[endPos+1] == (byte)0xff) {
subResult.add(null, endPos + 2); subResult.add(null, endPos + 2);
endPos += 2; endPos += 2;
} else { } else {
@ -631,19 +658,27 @@ class TupleUtil {
//System.out.println("Joining whole tuple..."); //System.out.println("Joining whole tuple...");
} }
static byte[] pack(List<Object> items, byte[] prefix) { static byte[] pack(List<Object> items, byte[] prefix, int expectedSize) {
EncodeState state = new EncodeState(2 * items.size() + (prefix == null ? 0 : 1)); ByteBuffer dest = ByteBuffer.allocate(expectedSize + (prefix != null ? prefix.length : 0));
EncodeState state = new EncodeState(dest);
if(prefix != null) {
state.add(prefix);
}
encodeAll(state, items, prefix); encodeAll(state, items, prefix);
if(state.versionPos >= 0) { if(state.versionPos >= 0) {
throw new IllegalArgumentException("Incomplete Versionstamp included in vanilla tuple packInternal"); throw new IllegalArgumentException("Incomplete Versionstamp included in vanilla tuple packInternal");
} }
else { else {
return ByteArrayUtil.join(null, state.encodedValues); return dest.array();
} }
} }
static byte[] packWithVersionstamp(List<Object> items, byte[] prefix) { static byte[] packWithVersionstamp(List<Object> items, byte[] prefix, int expectedSize) {
EncodeState state = new EncodeState(2 * items.size() + (prefix == null ? 1 : 2)); ByteBuffer dest = ByteBuffer.allocate(expectedSize + (prefix != null ? prefix.length : 0));
EncodeState state = new EncodeState(dest);
if(prefix != null) {
state.add(prefix);
}
encodeAll(state, items, prefix); encodeAll(state, items, prefix);
if(state.versionPos < 0) { if(state.versionPos < 0) {
throw new IllegalArgumentException("No incomplete Versionstamp included in tuple packInternal with versionstamp"); throw new IllegalArgumentException("No incomplete Versionstamp included in tuple packInternal with versionstamp");
@ -652,15 +687,73 @@ class TupleUtil {
if(state.versionPos > 0xffff) { if(state.versionPos > 0xffff) {
throw new IllegalArgumentException("Tuple has incomplete version at position " + state.versionPos + " which is greater than the maximum " + 0xffff); throw new IllegalArgumentException("Tuple has incomplete version at position " + state.versionPos + " which is greater than the maximum " + 0xffff);
} }
dest.order(ByteOrder.LITTLE_ENDIAN);
if (FDB.instance().getAPIVersion() < 520) { if (FDB.instance().getAPIVersion() < 520) {
state.add(ByteBuffer.allocate(Short.BYTES).order(ByteOrder.LITTLE_ENDIAN).putShort((short)state.versionPos).array()); dest.putShort((short)state.versionPos);
} else { } else {
state.add(ByteBuffer.allocate(Integer.BYTES).order(ByteOrder.LITTLE_ENDIAN).putInt(state.versionPos).array()); dest.putInt(state.versionPos);
} }
return ByteArrayUtil.join(null, state.encodedValues); return dest.array();
} }
} }
static int getPackedSize(List<?> items, boolean nested) {
int packedSize = 0;
for(Object item : items) {
if(item == null)
packedSize += nested ? 2 : 1;
else if(item instanceof byte[]) {
byte[] bytes = (byte[])item;
packedSize += 2 + bytes.length + ByteArrayUtil.nullCount((byte[])item);
}
else if(item instanceof String) {
try {
int strPackedSize = StringUtil.packedSize((String)item);
packedSize += 2 + strPackedSize;
}
catch (IllegalArgumentException e) {
// The unicode was malformed. Grab the array and count the bytes
byte[] strBytes = ((String)item).getBytes(UTF8);
packedSize += 2 + strBytes.length + ByteArrayUtil.nullCount(strBytes);
}
}
else if(item instanceof Float)
packedSize += 1 + Float.BYTES;
else if(item instanceof Double)
packedSize += 1 + Double.BYTES;
else if(item instanceof Boolean)
packedSize += 1;
else if(item instanceof UUID)
packedSize += 1 + 2 * Long.BYTES;
else if(item instanceof BigInteger) {
BigInteger bigInt = (BigInteger)item;
int byteCount = minimalByteCount(bigInt);
// If byteCount <= 8, then the encoding uses 1 byte for both the size
// and type code. If byteCount > 8, then there is 1 byte for the type code
// and 1 byte for the length. In both cases, the value is followed by
// the byte count.
packedSize += byteCount + ((byteCount <= 8) ? 1 : 2);
}
else if(item instanceof Number)
packedSize += 1 + minimalByteCount(((Number)item).longValue());
else if(item instanceof Versionstamp) {
packedSize += 1 + Versionstamp.LENGTH;
Versionstamp versionstamp = (Versionstamp)item;
if(!versionstamp.isComplete()) {
int suffixSize = FDB.instance().getAPIVersion() < 520 ? Short.BYTES : Integer.BYTES;
packedSize += suffixSize;
}
}
else if(item instanceof List<?>)
packedSize += 2 + getPackedSize((List<?>)item, true);
else if(item instanceof Tuple)
packedSize += 2 + ((Tuple)item).getPackedSize(true);
else
throw new IllegalArgumentException("unknown type " + item.getClass() + " for tuple packing");
}
return packedSize;
}
static boolean hasIncompleteVersionstamp(Stream<?> items) { static boolean hasIncompleteVersionstamp(Stream<?> items) {
return items.anyMatch(item -> { return items.anyMatch(item -> {
if(item == null) { if(item == null) {
@ -683,10 +776,10 @@ class TupleUtil {
public static void main(String[] args) { public static void main(String[] args) {
try { try {
byte[] bytes = pack(Collections.singletonList(4), null); byte[] bytes = pack(Collections.singletonList(4), null, 2);
DecodeState result = new DecodeState(); DecodeState result = new DecodeState();
decode(result, bytes, 0, bytes.length); decode(result, bytes, 0, bytes.length);
int val = (int)result.values.get(0); int val = ((Number)result.values.get(0)).intValue();
assert 4 == val; assert 4 == val;
} }
catch(Exception e) { catch(Exception e) {
@ -695,7 +788,7 @@ class TupleUtil {
} }
try { try {
byte[] bytes = pack(Collections.singletonList("\u021Aest \u0218tring"), null); byte[] bytes = pack(Collections.singletonList("\u021Aest \u0218tring"), null, 15);
DecodeState result = new DecodeState(); DecodeState result = new DecodeState();
decode(result, bytes, 0, bytes.length); decode(result, bytes, 0, bytes.length);
String string = (String)result.values.get(0); String string = (String)result.values.get(0);

View File

@ -412,7 +412,11 @@ public class AsyncStackTester {
return inst.popParams(listSize).thenAcceptAsync(rawElements -> { return inst.popParams(listSize).thenAcceptAsync(rawElements -> {
List<Tuple> tuples = new ArrayList<>(listSize); List<Tuple> tuples = new ArrayList<>(listSize);
for(Object o : rawElements) { for(Object o : rawElements) {
tuples.add(Tuple.fromBytes((byte[])o)); // Unpacking a tuple keeps around the serialized representation and uses
// it for comparison if it's available. To test semantic comparison, recreate
// the tuple from the item list.
Tuple t = Tuple.fromBytes((byte[])o);
tuples.add(Tuple.fromList(t.getItems()));
} }
Collections.sort(tuples); Collections.sort(tuples);
for(Tuple t : tuples) { for(Tuple t : tuples) {

View File

@ -368,9 +368,13 @@ public class StackTester {
else if (op == StackOperation.TUPLE_SORT) { else if (op == StackOperation.TUPLE_SORT) {
int listSize = StackUtils.getInt(inst.popParam().join()); int listSize = StackUtils.getInt(inst.popParam().join());
List<Object> rawElements = inst.popParams(listSize).join(); List<Object> rawElements = inst.popParams(listSize).join();
List<Tuple> tuples = new ArrayList<Tuple>(listSize); List<Tuple> tuples = new ArrayList<>(listSize);
for(Object o : rawElements) { for(Object o : rawElements) {
tuples.add(Tuple.fromBytes((byte[])o)); // Unpacking a tuple keeps around the serialized representation and uses
// it for comparison if it's available. To test semantic comparison, recreate
// the tuple from the item list.
Tuple t = Tuple.fromBytes((byte[])o);
tuples.add(Tuple.fromList(t.getItems()));
} }
Collections.sort(tuples); Collections.sort(tuples);
for(Tuple t : tuples) { for(Tuple t : tuples) {

View File

@ -16,7 +16,8 @@ public class TuplePerformanceTest {
private enum GeneratedTypes { private enum GeneratedTypes {
ALL, ALL,
LONG, LONG,
FLOATING_POINT FLOATING_POINT,
STRING_LIKE
} }
private final Random r; private final Random r;
@ -77,7 +78,7 @@ public class TuplePerformanceTest {
values.add(nested); values.add(nested);
} }
} }
return Tuple.fromItems(values); return Tuple.fromList(values);
} }
public Tuple createLongsTuple(int length) { public Tuple createLongsTuple(int length) {
@ -91,7 +92,7 @@ public class TuplePerformanceTest {
} }
values.add(val); values.add(val);
} }
return Tuple.fromItems(values); return Tuple.fromList(values);
} }
public Tuple createFloatingPointTuple(int length) { public Tuple createFloatingPointTuple(int length) {
@ -112,7 +113,41 @@ public class TuplePerformanceTest {
values.add(Double.longBitsToDouble(r.nextLong())); values.add(Double.longBitsToDouble(r.nextLong()));
} }
} }
return Tuple.fromItems(values); return Tuple.fromList(values);
}
public Tuple createStringLikeTuple(int length) {
List<Object> values = new ArrayList<>(length);
for(int i = 0; i < length; i++) {
double choice = r.nextDouble();
if(choice < 0.4) {
byte[] arr = new byte[r.nextInt(20)];
r.nextBytes(arr);
values.add(arr);
}
else if(choice < 0.8) {
// Random ASCII codepoints
int[] codepoints = new int[r.nextInt(20)];
for(int x = 0; x < codepoints.length; x++) {
codepoints[x] = r.nextInt(0x7F);
}
values.add(new String(codepoints, 0, codepoints.length));
}
else if(choice < 0.9) {
// All zeroes
byte[] zeroes = new byte[r.nextInt(20)];
values.add(zeroes);
}
else {
// Random Unicode codepoints
int[] codepoints = new int[r.nextInt(20)];
for(int x = 0; x < codepoints.length; x++) {
codepoints[x] = r.nextInt(0x10FFFF);
}
values.add(new String(codepoints, 0, codepoints.length));
}
}
return Tuple.fromList(values);
} }
public Tuple createTuple(int length) { public Tuple createTuple(int length) {
@ -123,6 +158,8 @@ public class TuplePerformanceTest {
return createLongsTuple(length); return createLongsTuple(length);
case FLOATING_POINT: case FLOATING_POINT:
return createFloatingPointTuple(length); return createFloatingPointTuple(length);
case STRING_LIKE:
return createStringLikeTuple(length);
default: default:
throw new IllegalStateException("unknown generated types " + generatedTypes); throw new IllegalStateException("unknown generated types " + generatedTypes);
} }
@ -143,6 +180,7 @@ public class TuplePerformanceTest {
long unpackNanos = 0L; long unpackNanos = 0L;
long equalsNanos = 0L; long equalsNanos = 0L;
long equalsArrayNanos = 0L; long equalsArrayNanos = 0L;
long sizeNanos = 0L;
long hashNanos = 0L; long hashNanos = 0L;
long secondHashNanos = 0L; long secondHashNanos = 0L;
long subspacePackNanos = 0L; long subspacePackNanos = 0L;
@ -150,6 +188,9 @@ public class TuplePerformanceTest {
long totalLength = 0L; long totalLength = 0L;
long totalBytes = 0L; long totalBytes = 0L;
for(int i = 0; i < iterations; i++) { for(int i = 0; i < iterations; i++) {
if(i % 100_000 == 0) {
System.out.println(" iteration " + i);
}
int length = r.nextInt(20); int length = r.nextInt(20);
Tuple t = createTuple(length); Tuple t = createTuple(length);
@ -157,8 +198,8 @@ public class TuplePerformanceTest {
byte[] serialized = t.pack(); byte[] serialized = t.pack();
long endNanos = System.nanoTime(); long endNanos = System.nanoTime();
packNanos += endNanos - startNanos; packNanos += endNanos - startNanos;
totalLength += length; totalLength += t.size();
totalBytes += serialized.length; totalBytes += t.getPackedSize();
startNanos = System.nanoTime(); startNanos = System.nanoTime();
Tuple t2 = Tuple.fromBytes(serialized); Tuple t2 = Tuple.fromBytes(serialized);
@ -182,6 +223,15 @@ public class TuplePerformanceTest {
endNanos = System.nanoTime(); endNanos = System.nanoTime();
equalsArrayNanos += endNanos - startNanos; equalsArrayNanos += endNanos - startNanos;
tCopy = Tuple.fromList(t.getItems());
startNanos = System.nanoTime();
int size = tCopy.getPackedSize();
endNanos = System.nanoTime();
if (size != t.pack().length) {
throw new RuntimeException("packed size did not match actual packed length: " + t + " -- " + " " + tCopy.getPackedSize() + " instead of " + t.getPackedSize());
}
sizeNanos += endNanos - startNanos;
startNanos = System.nanoTime(); startNanos = System.nanoTime();
byte[] subspacePacked = subspace.pack(t); byte[] subspacePacked = subspace.pack(t);
endNanos = System.nanoTime(); endNanos = System.nanoTime();
@ -229,6 +279,8 @@ public class TuplePerformanceTest {
System.out.printf(" Equals time per tuple: %f \u03BCs%n", equalsNanos * 1e-3 / iterations); System.out.printf(" Equals time per tuple: %f \u03BCs%n", equalsNanos * 1e-3 / iterations);
System.out.printf(" Equals time (using packed): %f s%n", equalsArrayNanos * 1e-9); System.out.printf(" Equals time (using packed): %f s%n", equalsArrayNanos * 1e-9);
System.out.printf(" Equals time (using packed) per tuple: %f \u03BCs%n", equalsArrayNanos * 1e-3 / iterations); System.out.printf(" Equals time (using packed) per tuple: %f \u03BCs%n", equalsArrayNanos * 1e-3 / iterations);
System.out.printf(" Size time: %f s%n", sizeNanos * 1e-9);
System.out.printf(" Size time per tuple: %f \u03BCs%n", sizeNanos * 1e-3 / iterations);
System.out.printf(" Subspace pack time: %f s%n", subspacePackNanos * 1e-9); System.out.printf(" Subspace pack time: %f s%n", subspacePackNanos * 1e-9);
System.out.printf(" Subspace pack time per tuple: %f \u03BCs%n", subspacePackNanos * 1e-3 / iterations); System.out.printf(" Subspace pack time per tuple: %f \u03BCs%n", subspacePackNanos * 1e-3 / iterations);
System.out.printf(" Subspace unpack time: %f s%n", subspaceUnpackNanos * 1e-9); System.out.printf(" Subspace unpack time: %f s%n", subspaceUnpackNanos * 1e-9);

View File

@ -20,10 +20,6 @@
package com.apple.foundationdb.test; package com.apple.foundationdb.test;
import com.apple.foundationdb.TransactionContext;
import com.apple.foundationdb.tuple.ByteArrayUtil;
import com.apple.foundationdb.tuple.Tuple;
import java.math.BigInteger; import java.math.BigInteger;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -31,6 +27,11 @@ import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.UUID; import java.util.UUID;
import com.apple.foundationdb.TransactionContext;
import com.apple.foundationdb.tuple.ByteArrayUtil;
import com.apple.foundationdb.tuple.Tuple;
import com.apple.foundationdb.tuple.Versionstamp;
public class TupleTest { public class TupleTest {
private static final byte FF = (byte)0xff; private static final byte FF = (byte)0xff;
@ -40,6 +41,7 @@ public class TupleTest {
// FDB fdb = FDB.selectAPIVersion(610); // FDB fdb = FDB.selectAPIVersion(610);
serializedForms(); serializedForms();
comparisons(); comparisons();
replaceTests();
/* /*
try(Database db = fdb.open()) { try(Database db = fdb.open()) {
runTests(reps, db); runTests(reps, db);
@ -70,6 +72,7 @@ public class TupleTest {
private static void serializedForms() { private static void serializedForms() {
List<TupleSerialization> serializations = new ArrayList<>(); List<TupleSerialization> serializations = new ArrayList<>();
TupleSerialization.addAll(serializations, TupleSerialization.addAll(serializations,
Tuple.from(), new byte[0],
Tuple.from(0L), new byte[]{0x14}, Tuple.from(0L), new byte[]{0x14},
Tuple.from(BigInteger.ZERO), new byte[]{0x14}, Tuple.from(BigInteger.ZERO), new byte[]{0x14},
Tuple.from(1L), new byte[]{0x15, 0x01}, Tuple.from(1L), new byte[]{0x15, 0x01},
@ -116,6 +119,9 @@ public class TupleTest {
Tuple.from(Double.longBitsToDouble(Long.MAX_VALUE)), new byte[]{0x21, FF, FF, FF, FF, FF, FF, FF, FF}, Tuple.from(Double.longBitsToDouble(Long.MAX_VALUE)), new byte[]{0x21, FF, FF, FF, FF, FF, FF, FF, FF},
Tuple.from(Float.intBitsToFloat(~0)), new byte[]{0x20, 0x00, 0x00, 0x00, 0x00}, Tuple.from(Float.intBitsToFloat(~0)), new byte[]{0x20, 0x00, 0x00, 0x00, 0x00},
Tuple.from(Double.longBitsToDouble(~0L)), new byte[]{0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, Tuple.from(Double.longBitsToDouble(~0L)), new byte[]{0x21, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
Tuple.from((Object)new byte[0]), new byte[]{0x01, 0x00},
Tuple.from((Object)new byte[]{0x01, 0x02, 0x03}), new byte[]{0x01, 0x01, 0x02, 0x03, 0x00},
Tuple.from((Object)new byte[]{0x00, 0x00, 0x00, 0x04}), new byte[]{0x01, 0x00, FF, 0x00, FF, 0x00, FF, 0x04, 0x00},
Tuple.from(""), new byte[]{0x02, 0x00}, Tuple.from(""), new byte[]{0x02, 0x00},
Tuple.from("hello"), new byte[]{0x02, 'h', 'e', 'l', 'l', 'o', 0x00}, Tuple.from("hello"), new byte[]{0x02, 'h', 'e', 'l', 'l', 'o', 0x00},
Tuple.from("\u4e2d\u6587"), new byte[]{0x02, (byte)0xe4, (byte)0xb8, (byte)0xad, (byte)0xe6, (byte)0x96, (byte)0x87, 0x00}, Tuple.from("\u4e2d\u6587"), new byte[]{0x02, (byte)0xe4, (byte)0xb8, (byte)0xad, (byte)0xe6, (byte)0x96, (byte)0x87, 0x00},
@ -123,17 +129,42 @@ public class TupleTest {
Tuple.from(new String(new int[]{0x1f525}, 0, 1)), new byte[]{0x02, (byte)0xf0, (byte)0x9f, (byte)0x94, (byte)0xa5, 0x00}, Tuple.from(new String(new int[]{0x1f525}, 0, 1)), new byte[]{0x02, (byte)0xf0, (byte)0x9f, (byte)0x94, (byte)0xa5, 0x00},
Tuple.from("\ud83d\udd25"), new byte[]{0x02, (byte)0xf0, (byte)0x9f, (byte)0x94, (byte)0xa5, 0x00}, Tuple.from("\ud83d\udd25"), new byte[]{0x02, (byte)0xf0, (byte)0x9f, (byte)0x94, (byte)0xa5, 0x00},
Tuple.from("\ud83e\udd6f"), new byte[]{0x02, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00}, Tuple.from("\ud83e\udd6f"), new byte[]{0x02, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00},
Tuple.from("\ud83d"), new byte[]{0x02, 0x3f, 0x00},
Tuple.from("\udd25\ud83e\udd6f"), new byte[]{0x02, 0x3f, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00}, // malformed string - low surrogate without high surrogate Tuple.from("\udd25\ud83e\udd6f"), new byte[]{0x02, 0x3f, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00}, // malformed string - low surrogate without high surrogate
Tuple.from("a\udd25\ud83e\udd6f"), new byte[]{0x02, 'a', 0x3f, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00} // malformed string - low surrogate without high surrogate Tuple.from("a\udd25\ud83e\udd6f"), new byte[]{0x02, 'a', 0x3f, (byte)0xf0, (byte)0x9f, (byte)0xa5, (byte)0xaf, 0x00}, // malformed string - low surrogate without high surrogate
Tuple.from(Tuple.from((Object)null)), new byte[]{0x05, 0x00, FF, 0x00},
Tuple.from(Tuple.from(null, "hello")), new byte[]{0x05, 0x00, FF, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00, 0x00},
Tuple.from(Arrays.asList(null, "hello")), new byte[]{0x05, 0x00, FF, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00, 0x00},
Tuple.from(Tuple.from(null, "hell\0")), new byte[]{0x05, 0x00, FF, 0x02, 'h', 'e', 'l', 'l', 0x00, FF, 0x00, 0x00},
Tuple.from(Arrays.asList(null, "hell\0")), new byte[]{0x05, 0x00, FF, 0x02, 'h', 'e', 'l', 'l', 0x00, FF, 0x00, 0x00},
Tuple.from(Tuple.from((Object)null), "hello"), new byte[]{0x05, 0x00, FF, 0x00, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00},
Tuple.from(Tuple.from((Object)null), "hello", new byte[]{0x01, 0x00}, new byte[0]), new byte[]{0x05, 0x00, FF, 0x00, 0x02, 'h', 'e', 'l', 'l', 'o', 0x00, 0x01, 0x01, 0x00, FF, 0x00, 0x01, 0x00},
Tuple.from(new UUID(0xba5eba11, 0x5ca1ab1e)), new byte[]{0x30, FF, FF, FF, FF, (byte)0xba, 0x5e, (byte)0xba, 0x11, 0x00, 0x00, 0x00, 0x00, 0x5c, (byte)0xa1, (byte)0xab, 0x1e},
Tuple.from(false), new byte[]{0x26},
Tuple.from(true), new byte[]{0x27},
Tuple.from((short)0x3019), new byte[]{0x16, 0x30, 0x19},
Tuple.from((byte)0x03), new byte[]{0x15, 0x03},
Tuple.from(Versionstamp.complete(new byte[]{(byte)0xaa, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03})), new byte[]{0x33, (byte)0xaa, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03, 0x00, 0x00},
Tuple.from(Versionstamp.complete(new byte[]{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a}, 657)), new byte[]{0x33, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x02, (byte)0x91}
); );
Tuple bigTuple = new Tuple();
List<byte[]> serializedForms = new ArrayList<>();
for(TupleSerialization serialization : serializations) {
bigTuple = bigTuple.addAll(serialization.tuple);
serializedForms.add(serialization.serialization);
}
serializations.add(new TupleSerialization(bigTuple, ByteArrayUtil.join(null, serializedForms)));
for(TupleSerialization serialization : serializations) { for(TupleSerialization serialization : serializations) {
System.out.println("Packing " + serialization.tuple + " (expecting: " + ByteArrayUtil.printable(serialization.serialization) + ")"); System.out.println("Packing " + serialization.tuple + " (expecting: " + ByteArrayUtil.printable(serialization.serialization) + ")");
if(serialization.tuple.getPackedSize() != serialization.serialization.length) {
throw new RuntimeException("Tuple " + serialization.tuple + " packed size " + serialization.tuple.getPackedSize() + " does not match expected packed size " + serialization.serialization.length);
}
if(!Arrays.equals(serialization.tuple.pack(), serialization.serialization)) { if(!Arrays.equals(serialization.tuple.pack(), serialization.serialization)) {
throw new RuntimeException("Tuple " + serialization.tuple + " has serialization " + ByteArrayUtil.printable(serialization.tuple.pack()) + throw new RuntimeException("Tuple " + serialization.tuple + " has serialization " + ByteArrayUtil.printable(serialization.tuple.pack()) +
" which does not match expected serialization " + ByteArrayUtil.printable(serialization.serialization)); " which does not match expected serialization " + ByteArrayUtil.printable(serialization.serialization));
} }
if(!Objects.equals(serialization.tuple, Tuple.fromBytes(serialization.serialization))) { if(!Objects.equals(serialization.tuple, Tuple.fromItems(Tuple.fromBytes(serialization.serialization).getItems()))) {
throw new RuntimeException("Tuple " + serialization.tuple + " does not match deserialization " + Tuple.fromBytes(serialization.serialization) + throw new RuntimeException("Tuple " + serialization.tuple + " does not match deserialization " + Tuple.fromBytes(serialization.serialization) +
" which comes from serialization " + ByteArrayUtil.printable(serialization.serialization)); " which comes from serialization " + ByteArrayUtil.printable(serialization.serialization));
} }
@ -176,6 +207,16 @@ public class TupleTest {
Tuple.from((Object)new byte[]{0x00, FF}), Tuple.from((Object)new byte[]{0x00, FF}),
Tuple.from((Object)new byte[]{0x7f}), Tuple.from((Object)new byte[]{0x7f}),
Tuple.from((Object)new byte[]{(byte)0x80}), Tuple.from((Object)new byte[]{(byte)0x80}),
Tuple.from(null, new byte[0]),
Tuple.from(null, new byte[]{0x00}),
Tuple.from(null, new byte[]{0x00, FF}),
Tuple.from(null, new byte[]{0x7f}),
Tuple.from(null, new byte[]{(byte)0x80}),
Tuple.from(Tuple.from(null, new byte[0])),
Tuple.from(Tuple.from(null, new byte[]{0x00})),
Tuple.from(Tuple.from(null, new byte[]{0x00, FF})),
Tuple.from(Tuple.from(null, new byte[]{0x7f})),
Tuple.from(Tuple.from(null, new byte[]{(byte)0x80})),
Tuple.from("a"), Tuple.from("a"),
Tuple.from("\u03bc\u03ac\u03b8\u03b7\u03bc\u03b1"), Tuple.from("\u03bc\u03ac\u03b8\u03b7\u03bc\u03b1"),
Tuple.from("\u03bc\u03b1\u0301\u03b8\u03b7\u03bc\u03b1"), Tuple.from("\u03bc\u03b1\u0301\u03b8\u03b7\u03bc\u03b1"),
@ -195,7 +236,18 @@ public class TupleTest {
Tuple.from(new UUID(-1, 0)), Tuple.from(new UUID(-1, 0)),
Tuple.from(new UUID(-1, -1)), Tuple.from(new UUID(-1, -1)),
Tuple.from(new UUID(1, -1)), Tuple.from(new UUID(1, -1)),
Tuple.from(new UUID(1, 1)) Tuple.from(new UUID(1, 1)),
Tuple.from(false),
Tuple.from(true),
Tuple.from(Arrays.asList(0, 1, 2)),
Tuple.from(Arrays.asList(0, 1), "hello"),
Tuple.from(Arrays.asList(0, 1), "help"),
Tuple.from(Versionstamp.complete(new byte[]{0x0a, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03})),
Tuple.from(Versionstamp.complete(new byte[]{(byte)0xaa, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03})),
Tuple.from(Versionstamp.complete(new byte[]{(byte)0xaa, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03}, 1)),
Tuple.from(Versionstamp.complete(new byte[]{(byte)0xaa, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03}, 0xa101)),
Tuple.from(Versionstamp.complete(new byte[]{(byte)0xaa, (byte)0xbb, (byte)0xcc, (byte)0xdd, (byte)0xee, FF, 0x00, 0x01, 0x02, 0x03}, 65535))
); );
for(Tuple t1 : tuples) { for(Tuple t1 : tuples) {
@ -209,6 +261,47 @@ public class TupleTest {
if(Integer.signum(semanticComparison) != Integer.signum(byteComparison)) { if(Integer.signum(semanticComparison) != Integer.signum(byteComparison)) {
throw new RuntimeException("Tuple t1 and t2 comparison mismatched: semantic = " + semanticComparison + " while byte order = " + byteComparison); throw new RuntimeException("Tuple t1 and t2 comparison mismatched: semantic = " + semanticComparison + " while byte order = " + byteComparison);
} }
int implicitByteComparison = t1.compareTo(t2);
if(Integer.signum(semanticComparison) != Integer.signum(implicitByteComparison)) {
throw new RuntimeException("Tuple t1 and t2 comparison mismatched: semantic = " + semanticComparison + " while implicit byte order = " + implicitByteComparison);
}
}
}
}
// These should be in ArrayUtilTest, but those can't be run at the moment, so here they go.
private static void replaceTests() {
List<byte[]> arrays = Arrays.asList(
new byte[]{0x01, 0x02, 0x01, 0x02}, new byte[]{0x01, 0x02}, new byte[]{0x03, 0x04}, new byte[]{0x03, 0x04, 0x03, 0x04},
new byte[]{0x01, 0x02, 0x01, 0x02}, new byte[]{0x01, 0x02}, new byte[]{0x03}, new byte[]{0x03, 0x03},
new byte[]{0x01, 0x02, 0x01, 0x02}, new byte[]{0x01, 0x02}, new byte[]{0x03, 0x04, 0x05}, new byte[]{0x03, 0x04, 0x05, 0x03, 0x04, 0x05},
new byte[]{0x00, 0x01, 0x02, 0x00, 0x01, 0x02, 0x00}, new byte[]{0x01, 0x02}, new byte[]{0x03, 0x04, 0x05}, new byte[]{0x00, 0x03, 0x04, 0x05, 0x00, 0x03, 0x04, 0x05, 0x00},
new byte[]{0x01, 0x01, 0x01, 0x01}, new byte[]{0x01, 0x02}, new byte[]{0x03, 0x04}, new byte[]{0x01, 0x01, 0x01, 0x01},
new byte[]{0x01, 0x01, 0x01, 0x01}, new byte[]{0x01, 0x02}, new byte[]{0x03}, new byte[]{0x01, 0x01, 0x01, 0x01},
new byte[]{0x01, 0x01, 0x01, 0x01}, new byte[]{0x01, 0x02}, new byte[]{0x03, 0x04, 0x05}, new byte[]{0x01, 0x01, 0x01, 0x01},
new byte[]{0x01, 0x01, 0x01, 0x01, 0x01}, new byte[]{0x01, 0x01}, new byte[]{0x03, 0x04, 0x05}, new byte[]{0x03, 0x04, 0x05, 0x03, 0x04, 0x05, 0x01},
new byte[]{0x01, 0x01, 0x01, 0x01, 0x01}, new byte[]{0x01, 0x01}, new byte[]{0x03, 0x04}, new byte[]{0x03, 0x04, 0x03, 0x04, 0x01},
new byte[]{0x01, 0x01, 0x01, 0x01, 0x01}, new byte[]{0x01, 0x01}, new byte[]{0x03}, new byte[]{0x03, 0x03, 0x01},
new byte[]{0x01, 0x02, 0x01, 0x02}, new byte[]{0x01, 0x02}, null, new byte[0],
new byte[]{0x01, 0x02, 0x01, 0x02}, new byte[]{0x01, 0x02}, new byte[0], new byte[0],
new byte[]{0x01, 0x02, 0x01, 0x02}, null, new byte[]{0x04}, new byte[]{0x01, 0x02, 0x01, 0x02},
new byte[]{0x01, 0x02, 0x01, 0x02}, new byte[0], new byte[]{0x04}, new byte[]{0x01, 0x02, 0x01, 0x02},
null, new byte[]{0x01, 0x02}, new byte[]{0x04}, null
);
for(int i = 0; i < arrays.size(); i += 4) {
byte[] src = arrays.get(i);
byte[] pattern = arrays.get(i + 1);
byte[] replacement = arrays.get(i + 2);
byte[] expectedResults = arrays.get(i + 3);
byte[] results = ByteArrayUtil.replace(src, pattern, replacement);
if(!Arrays.equals(results, expectedResults)) {
throw new RuntimeException("results " + ByteArrayUtil.printable(results) + " did not match expected results " +
ByteArrayUtil.printable(expectedResults) + " when replacing " + ByteArrayUtil.printable(pattern) +
" with " + ByteArrayUtil.printable(replacement) + " in " + ByteArrayUtil.printable(src));
}
if(src != null && src == results) {
throw new RuntimeException("src and results array are pointer-equal when replacing " + ByteArrayUtil.printable(pattern) +
" with " + ByteArrayUtil.printable(replacement) + " in " + ByteArrayUtil.printable(src));
} }
} }
} }