Merge pull request #915 from alecgrieser/feature-bigint-support-go-ruby-aj

Support integers up to 255 bytes in Go and Ruby
2018-11-13 11:56:31 -08:00 · 2018-11-13 11:56:31 -08:00 · 4afccb4811
parent 326008f2e7 8424fc57ec
commit 4afccb4811
8 changed files with 224 additions and 56 deletions
--- a/bindings/bindingtester/known_testers.py
+++ b/bindings/bindingtester/known_testers.py
@ -59,9 +59,9 @@ _java_cmd = 'java -ea -cp %s:%s com.apple.foundationdb.test.' % (
 testers = {
    'python': Tester('python', 'python ' + _absolute_path('python/tests/tester.py'), 2040, 23, MAX_API_VERSION, types=ALL_TYPES),
    'python3': Tester('python3', 'python3 ' + _absolute_path('python/tests/tester.py'), 2040, 23, MAX_API_VERSION, types=ALL_TYPES),
-    'ruby': Tester('ruby', _absolute_path('ruby/tests/tester.rb'), 64, 23, MAX_API_VERSION),
+    'ruby': Tester('ruby', _absolute_path('ruby/tests/tester.rb'), 2040, 23, MAX_API_VERSION),
    'java': Tester('java', _java_cmd + 'StackTester', 2040, 510, MAX_API_VERSION, types=ALL_TYPES),
    'java_async': Tester('java', _java_cmd + 'AsyncStackTester', 2040, 510, MAX_API_VERSION, types=ALL_TYPES),
-    'go': Tester('go', _absolute_path('go/build/bin/_stacktester'), 63, 200, MAX_API_VERSION),
+    'go': Tester('go', _absolute_path('go/build/bin/_stacktester'), 2040, 200, MAX_API_VERSION),
    'flow': Tester('flow', _absolute_path('flow/bin/fdb_flow_tester'), 63, 500, MAX_API_VERSION, directory_snapshot_ops_enabled=False),
 }
--- a/bindings/go/src/_stacktester/stacktester.go
+++ b/bindings/go/src/_stacktester/stacktester.go
@ -28,6 +28,7 @@ import (
 	"github.com/apple/foundationdb/bindings/go/src/fdb"
 	"github.com/apple/foundationdb/bindings/go/src/fdb/tuple"
 	"log"
+	"math/big"
 	"os"
 	"reflect"
 	"runtime"
@ -103,7 +104,7 @@ func (sm *StackMachine) waitAndPop() (ret stackEntry) {
 	switch el := ret.item.(type) {
 	case []byte:
 		ret.item = el
-	case int64, string, bool, tuple.UUID, float32, float64, tuple.Tuple:
+	case int64, uint64, *big.Int, string, bool, tuple.UUID, float32, float64, tuple.Tuple:
 		ret.item = el
 	case fdb.Key:
 		ret.item = []byte(el)
@ -174,8 +175,10 @@ func tupleToString(t tuple.Tuple) string {
 			buffer.WriteString(", ")
 		}
 		switch el := el.(type) {
-		case int64:
+		case int64, uint64:
 			buffer.WriteString(fmt.Sprintf("%d", el))
+		case *big.Int:
+			buffer.WriteString(fmt.Sprintf("%s", el))
 		case []byte:
 			buffer.WriteString(fmt.Sprintf("%+q", string(el)))
 		case string:
@ -184,9 +187,7 @@ func tupleToString(t tuple.Tuple) string {
 			buffer.WriteString(fmt.Sprintf("%t", el))
 		case tuple.UUID:
 			buffer.WriteString(hex.EncodeToString(el[:]))
-		case float32:
-			buffer.WriteString(fmt.Sprintf("%f", el))
-		case float64:
+		case float32, float64:
 			buffer.WriteString(fmt.Sprintf("%f", el))
 		case nil:
 			buffer.WriteString("nil")
@ -205,8 +206,10 @@ func (sm *StackMachine) dumpStack() {
 		fmt.Printf(" %d.", sm.stack[i].idx)
 		el := sm.stack[i].item
 		switch el := el.(type) {
-		case int64:
+		case int64, uint64:
 			fmt.Printf(" %d", el)
+		case *big.Int:
+			fmt.Printf(" %s", el)
 		case fdb.FutureNil:
 			fmt.Printf(" FutureNil")
 		case fdb.FutureByteSlice:
@ -225,9 +228,7 @@ func (sm *StackMachine) dumpStack() {
 			fmt.Printf(" %s", tupleToString(el))
 		case tuple.UUID:
 			fmt.Printf(" %s", hex.EncodeToString(el[:]))
-		case float32:
-			fmt.Printf(" %f", el)
-		case float64:
+		case float32, float64:
 			fmt.Printf(" %f", el)
 		case nil:
 			fmt.Printf(" nil")
@ -490,7 +491,27 @@ func (sm *StackMachine) processInst(idx int, inst tuple.Tuple) {
 	case op == "POP":
 		sm.stack = sm.stack[:len(sm.stack)-1]
 	case op == "SUB":
-		sm.store(idx, sm.waitAndPop().item.(int64)-sm.waitAndPop().item.(int64))
+		var x, y *big.Int
+		switch x1 := sm.waitAndPop().item.(type) {
+		case *big.Int:
+			x = x1
+		case int64:
+			x = big.NewInt(x1)
+		case uint64:
+			x = new(big.Int)
+			x.SetUint64(x1)
+		}
+		switch y1 := sm.waitAndPop().item.(type) {
+		case *big.Int:
+			y = y1
+		case int64:
+			y = big.NewInt(y1)
+		case uint64:
+			y = new(big.Int)
+			y.SetUint64(y1)
+		}
+
+		sm.store(idx, x.Sub(x, y))
 	case op == "CONCAT":
 		str1 := sm.waitAndPop().item
 		str2 := sm.waitAndPop().item
--- a/bindings/go/src/fdb/tuple/tuple.go
+++ b/bindings/go/src/fdb/tuple/tuple.go
@ -30,9 +30,10 @@
 // (https://apple.github.io/foundationdb/data-modeling.html#tuples).
 //
 // FoundationDB tuples can currently encode byte and unicode strings, integers,
-// floats, doubles, booleans, UUIDs, tuples, and NULL values. In Go these are
-// represented as []byte (or fdb.KeyConvertible), string, int64 (or int),
-// float32, float64, bool, UUID, Tuple, and nil.
+// large integers, floats, doubles, booleans, UUIDs, tuples, and NULL values.
+// In Go these are represented as []byte (or fdb.KeyConvertible), string, int64
+// (or int, uint, uint64), *big.Int (or big.Int), float32, float64, bool,
+// UUID, Tuple, and nil.
 package tuple

 import (
@ -40,6 +41,7 @@ import (
 	"encoding/binary"
 	"fmt"
 	"math"
+	"math/big"

 	"github.com/apple/foundationdb/bindings/go/src/fdb"
 )
@ -50,7 +52,8 @@ import (
 // result in a runtime panic).
 //
 // The valid types for TupleElement are []byte (or fdb.KeyConvertible), string,
-// int64 (or int), float, double, bool, UUID, Tuple, and nil.
+// int64 (or int, uint, uint64), *big.Int (or big.Int), float, double, bool,
+// UUID, Tuple, and nil.
 type TupleElement interface{}

 // Tuple is a slice of objects that can be encoded as FoundationDB tuples. If
@ -59,7 +62,7 @@ type TupleElement interface{}
 //
 // Given a Tuple T containing objects only of these types, then T will be
 // identical to the Tuple returned by unpacking the byte slice obtained by
-// packing T (modulo type normalization to []byte and int64).
+// packing T (modulo type normalization to []byte, uint64, and int64).
 type Tuple []TupleElement

 // UUID wraps a basic byte array as a UUID. We do not provide any special
@ -76,8 +79,8 @@ const bytesCode = 0x01
 const stringCode = 0x02
 const nestedCode = 0x05
 const intZeroCode = 0x14
-const posIntEnd = 0x1c
-const negIntStart = 0x0c
+const posIntEnd = 0x1d
+const negIntStart = 0x0b
 const floatCode = 0x20
 const doubleCode = 0x21
 const falseCode = 0x26
@ -96,6 +99,8 @@ var sizeLimits = []uint64{
 	1<<(8*8) - 1,
 }

+var minInt64BigInt = big.NewInt(math.MinInt64)
+
 func bisectLeft(u uint64) int {
 	var n int
 	for sizeLimits[n] < u {
@ -148,30 +153,79 @@ func (p *packer) encodeBytes(code byte, b []byte) {
 	p.putByte(0x00)
 }

-func (p *packer) encodeInt(i int64) {
+func (p *packer) encodeUint(i uint64) {
 	if i == 0 {
-		p.putByte(0x14)
+		p.putByte(intZeroCode)
 		return
 	}

-	var n int
+	n := bisectLeft(i)
 	var scratch [8]byte

-	switch {
-	case i > 0:
-		n = bisectLeft(uint64(i))
-		p.putByte(byte(intZeroCode + n))
-		binary.BigEndian.PutUint64(scratch[:], uint64(i))
-	case i < 0:
-		n = bisectLeft(uint64(-i))
-		p.putByte(byte(0x14 - n))
-		offsetEncoded := int64(sizeLimits[n]) + i
-		binary.BigEndian.PutUint64(scratch[:], uint64(offsetEncoded))
-	}
+	p.putByte(byte(intZeroCode + n))
+	binary.BigEndian.PutUint64(scratch[:], i)

 	p.putBytes(scratch[8-n:])
 }

+func (p *packer) encodeInt(i int64) {
+	if i >= 0 {
+		p.encodeUint(uint64(i))
+		return
+	}
+
+	n := bisectLeft(uint64(-i))
+	var scratch [8]byte
+
+	p.putByte(byte(intZeroCode - n))
+	offsetEncoded := int64(sizeLimits[n]) + i
+	binary.BigEndian.PutUint64(scratch[:], uint64(offsetEncoded))
+
+	p.putBytes(scratch[8-n:])
+}
+
+func (p *packer) encodeBigInt(i *big.Int) {
+	length := len(i.Bytes())
+	if length > 0xff {
+		panic(fmt.Sprintf("Integer magnitude is too large (more than 255 bytes)"))
+	}
+
+	if i.Sign() >= 0 {
+		intBytes := i.Bytes()
+		if length > 8 {
+			p.putByte(byte(posIntEnd))
+			p.putByte(byte(len(intBytes)))
+		} else {
+			p.putByte(byte(intZeroCode + length))
+		}
+
+		p.putBytes(intBytes)
+	} else {
+		add := new(big.Int).Lsh(big.NewInt(1), uint(length*8))
+		add.Sub(add, big.NewInt(1))
+		transformed := new(big.Int)
+		transformed.Add(i, add)
+
+		intBytes := transformed.Bytes()
+		if length > 8 {
+			p.putByte(byte(negIntStart))
+			p.putByte(byte(length ^ 0xff))
+		} else {
+			p.putByte(byte(intZeroCode - length))
+		}
+
+		// For large negative numbers whose absolute value begins with 0xff bytes,
+		// the transformed bytes may begin with 0x00 bytes. However, intBytes
+		// will only contain the non-zero suffix, so this loop is needed to make
+		// the value written be the correct length.
+		for i := len(intBytes); i < length; i++ {
+			p.putByte(0x00)
+		}
+
+		p.putBytes(intBytes)
+	}
+}
+
 func (p *packer) encodeFloat(f float32) {
 	var scratch [4]byte
 	binary.BigEndian.PutUint32(scratch[:], math.Float32bits(f))
@ -209,10 +263,18 @@ func (p *packer) encodeTuple(t Tuple, nested bool) {
 			if nested {
 				p.putByte(0xff)
 			}
-		case int64:
-			p.encodeInt(e)
 		case int:
 			p.encodeInt(int64(e))
+		case int64:
+			p.encodeInt(e)
+		case uint:
+			p.encodeUint(uint64(e))
+		case uint64:
+			p.encodeUint(e)
+		case *big.Int:
+			p.encodeBigInt(e)
+		case big.Int:
+			p.encodeBigInt(&e)
 		case []byte:
 			p.encodeBytes(bytesCode, e)
 		case fdb.KeyConvertible:
@ -243,8 +305,10 @@ func (p *packer) encodeTuple(t Tuple, nested bool) {

 // Pack returns a new byte slice encoding the provided tuple. Pack will panic if
 // the tuple contains an element of any type other than []byte,
-// fdb.KeyConvertible, string, int64, int, float32, float64, bool, tuple.UUID,
-// nil, or a Tuple with elements of valid types.
+// fdb.KeyConvertible, string, int64, int, uint64, uint, *big.Int, big.Int, float32,
+// float64, bool, tuple.UUID, nil, or a Tuple with elements of valid types. It will
+// also panic if an integer is specified with a value outside the range
+// [-2**2040+1, 2**2040-1]
 //
 // Tuple satisfies the fdb.KeyConvertible interface, so it is not necessary to
 // call Pack when using a Tuple with a FoundationDB API function that requires a
@ -282,9 +346,9 @@ func decodeString(b []byte) (string, int) {
 	return string(bp), idx
 }

-func decodeInt(b []byte) (int64, int) {
+func decodeInt(b []byte) (interface{}, int) {
 	if b[0] == intZeroCode {
-		return 0, 1
+		return int64(0), 1
 	}

 	var neg bool
@ -299,14 +363,55 @@ func decodeInt(b []byte) (int64, int) {
 	copy(bp[8-n:], b[1:n+1])

 	var ret int64
-
 	binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret)

 	if neg {
-		ret -= int64(sizeLimits[n])
+		return ret - int64(sizeLimits[n]), n + 1
 	}

-	return ret, n + 1
+	if ret > 0 {
+		return ret, n + 1
+	}
+
+	// The encoded value claimed to be positive yet when put in an int64
+	// produced a negative value. This means that the number must be a positive
+	// 64-bit value that uses the most significant bit. This can be fit in a
+	// uint64, so return that. Note that this is the *only* time we return
+	// a uint64.
+	return uint64(ret), n + 1
+}
+
+func decodeBigInt(b []byte) (interface{}, int) {
+	val := new(big.Int)
+	offset := 1
+	var length int
+
+	if b[0] == negIntStart || b[0] == posIntEnd {
+		length = int(b[1])
+		if b[0] == negIntStart {
+			length ^= 0xff
+		}
+
+		offset += 1
+	} else {
+		// Must be a negative 8 byte integer
+		length = 8
+	}
+
+	val.SetBytes(b[offset : length+offset])
+
+	if b[0] < intZeroCode {
+		sub := new(big.Int).Lsh(big.NewInt(1), uint(length)*8)
+		sub.Sub(sub, big.NewInt(1))
+		val.Sub(val, sub)
+	}
+
+	// This is the only value that fits in an int64 or uint64 that is decoded with this function
+	if val.Cmp(minInt64BigInt) == 0 {
+		return val.Int64(), length + offset
+	}
+
+	return val, length + offset
 }

 func decodeFloat(b []byte) (float32, int) {
@ -357,8 +462,12 @@ func decodeTuple(b []byte, nested bool) (Tuple, int, error) {
 			el, off = decodeBytes(b[i:])
 		case b[i] == stringCode:
 			el, off = decodeString(b[i:])
-		case negIntStart <= b[i] && b[i] <= posIntEnd:
+		case negIntStart+1 < b[i] && b[i] < posIntEnd:
 			el, off = decodeInt(b[i:])
+		case negIntStart+1 == b[i] && (b[i+1]&0x80 != 0):
+			el, off = decodeInt(b[i:])
+		case negIntStart <= b[i] && b[i] <= posIntEnd:
+			el, off = decodeBigInt(b[i:])
 		case b[i] == floatCode:
 			if i+5 > len(b) {
 				return nil, i, fmt.Errorf("insufficient bytes to decode float starting at position %d of byte array for tuple", i)
--- a/bindings/ruby/lib/fdbimpl.rb
+++ b/bindings/ruby/lib/fdbimpl.rb
@ -170,7 +170,7 @@ module FDB
          Proc.new do || @setfunc.call(v[0], nil) end
        when String then
          Proc.new do |opt=nil| @setfunc.call(v[0], (opt.nil? ? opt : opt.encode('UTF-8')) ) end
-        when Fixnum then
+        when Integer then
          Proc.new do |opt| @setfunc.call(v[0], [opt].pack("q<")) end
        else
          raise ArgumentError, "Don't know how to set options of type #{v[2].class}"
--- a/bindings/ruby/lib/fdbtuple.rb
+++ b/bindings/ruby/lib/fdbtuple.rb
@ -35,8 +35,8 @@ module FDB
    @@STRING_CODE     = 0x02
    @@NESTED_CODE     = 0x05
    @@INT_ZERO_CODE   = 0x14
-    @@POS_INT_END     = 0x1c
-    @@NEG_INT_START   = 0x0c
+    @@POS_INT_END     = 0x1d
+    @@NEG_INT_START   = 0x0b
    @@FLOAT_CODE      = 0x20
    @@DOUBLE_CODE     = 0x21
    @@FALSE_CODE      = 0x26
@ -117,12 +117,28 @@ module FDB
      elsif code == @@STRING_CODE
        epos = find_terminator(v, pos+1)
        [v.slice(pos+1, epos-pos-1).gsub("\x00\xFF", "\x00").force_encoding("UTF-8"), epos+1]
-      elsif code >= @@INT_ZERO_CODE && code <= @@POS_INT_END
+      elsif code >= @@INT_ZERO_CODE && code < @@POS_INT_END
        n = code - @@INT_ZERO_CODE
        [("\x00" * (8-n) + v.slice(pos+1, n)).unpack("Q>")[0], pos+n+1]
-      elsif code >= @@NEG_INT_START and code < @@INT_ZERO_CODE
+      elsif code > @@NEG_INT_START and code < @@INT_ZERO_CODE
        n = @@INT_ZERO_CODE - code
        [("\x00" * (8-n) + v.slice(pos+1, n)).unpack("Q>")[0]-@@size_limits[n], pos+n+1]
+      elsif code == @@POS_INT_END
+        length = v.getbyte(pos+1)
+        val = 0
+        length.times do |i|
+          val = val << 8
+          val += v.getbyte(pos+2+i)
+        end
+        [val, pos+length+2]
+      elsif code == @@NEG_INT_START
+        length = v.getbyte(pos+1) ^ 0xff
+        val = 0
+        length.times do |i|
+          val = val << 8
+          val += v.getbyte(pos+2+i)
+        end
+        [val - (1 << (length*8)) + 1, pos+length+2]
      elsif code == @@FALSE_CODE
        [false, pos+1]
      elsif code == @@TRUE_CODE
@ -182,15 +198,34 @@ module FDB
          raise ArgumentError, "unsupported encoding #{v.encoding.name}"
        end
      elsif v.kind_of? Integer
-        raise RangeError, "value outside inclusive range -2**64+1 to 2**64-1" if v < -2**64+1 || v > 2**64-1
+        raise RangeError, "Integer magnitude is too large (more than 255 bytes)" if v < -2**2040+1 || v > 2**2040-1
        if v == 0
          @@INT_ZERO_CODE.chr
        elsif v > 0
-          n = bisect_left( @@size_limits, v )
-          (20+n).chr + [v].pack("Q>").slice(8-n, n)
+          if v > @@size_limits[-1]
+            length = (v.bit_length + 7) / 8
+            result = @@POS_INT_END.chr + length.chr
+            length.times do |i|
+              result << ((v >> (8 * (length-i-1))) & 0xff)
+            end
+            result
+          else
+            n = bisect_left( @@size_limits, v )
+            (@@INT_ZERO_CODE+n).chr + [v].pack("Q>").slice(8-n, n)
+          end
        else
-          n = bisect_left( @@size_limits, -v )
-          (20-n).chr + [@@size_limits[n]+v].pack("Q>").slice(8-n, n)
+          if -v > @@size_limits[-1]
+            length = ((-v).bit_length + 7) / 8
+            v += (1 << (length * 8)) - 1
+            result = @@NEG_INT_START.chr + (length ^ 0xff).chr
+            length.times do |i|
+              result << ((v >> (8 * (length-i-1))) & 0xff)
+            end
+            result
+          else
+            n = bisect_left( @@size_limits, -v )
+            (@@INT_ZERO_CODE-n).chr + [@@size_limits[n]+v].pack("Q>").slice(8-n, n)
+          end
        end
      elsif v.kind_of? TrueClass
        @@TRUE_CODE.chr
--- a/bindings/ruby/tests/tester.rb
+++ b/bindings/ruby/tests/tester.rb
--- a/documentation/sphinx/source/api-ruby.rst
+++ b/documentation/sphinx/source/api-ruby.rst
@ -983,8 +983,8 @@ In the FoundationDB Ruby API, a tuple is an :class:`Enumerable` of elements of t
 | Unicode string       | Any value ``v`` where ``v.kind_of? String == true`` and ``v.encoding`` is   | ``String`` with encoding ``Encoding::UTF_8``                                 |
 |                      | ``Encoding::UTF_8``                                                         |                                                                              |
 +----------------------+-----------------------------------------------------------------------------+------------------------------------------------------------------------------+
-| 64-bit signed integer| Any value ``v`` where ``v.kind_of? Integer == true`` and ``-2**64+1 <= v <= | ``Fixnum`` or ``Bignum`` (depending on the magnitude of the value)           |
-|                      | 2**64-1``                                                                   |                                                                              |
+| Integer              | Any value ``v`` where ``v.kind_of? Integer == true`` and                    | ``Integer``                                                                  |
+|                      | ``-2**2040+1 <= v <= 2**2040-1``                                            |                                                                              |
 +----------------------+-----------------------------------------------------------------------------+------------------------------------------------------------------------------+
 | Floating point number| Any value ``v`` where ``v.kind_of? FDB::Tuple::SingleFloat`` where          | :class:`FDB::Tuple::SingleFloat`                                             |
 | (single-precision)   | ``v.value.kind_of? Float`` and ``v.value`` fits inside an IEEE 754 32-bit   |                                                                              |
--- a/documentation/sphinx/source/release-notes.rst
+++ b/documentation/sphinx/source/release-notes.rst
@ -98,10 +98,13 @@ Bindings
 * C API calls made on the network thread could be reordered with calls made from other threads. [6.0.2] `(Issue #518) <https://github.com/apple/foundationdb/issues/518>`_
 * The TLS_PLUGIN option is now a no-op and has been deprecated. [6.0.10] `(PR #710) <https://github.com/apple/foundationdb/pull/710>`_
 * Java: the `Versionstamp::getUserVersion() </javadoc/com/apple/foundationdb/tuple/Versionstamp.html#getUserVersion-->`_ method did not handle user versions greater than ``0x00FF`` due to operator precedence errors. [6.0.11] `(Issue #761) <https://github.com/apple/foundationdb/issues/761>`_
+* Python: bindings didn't work with Python 3.7 because of the new `async` keyword. [6.0.13] `(Issue #830) <https://github.com/apple/foundationdb/issues/830>`_
+* Go: `PrefixRange` didn't correctly return an error if it failed to generate the range. [6.0.15] `(PR #878) <https://github.com/apple/foundationdb/pull/878>`_
+* Go: Add Tuple layer support for `uint`, `uint64`, and `*big.Int` integers up to 255 bytes. Integer values will be decoded into the first of `int64`, `uint64`, or `*big.Int` in which they fit. `(PR #915) <https://github.com/apple/foundationdb/pull/915>`_ [6.0.15]
+* Ruby: Add Tuple layer support for integers up to 255 bytes. `(PR #915) <https://github.com/apple/foundationdb/pull/915>`_ [6.0.15]
 * Python: bindings didn't work with Python 3.7 because of the new ``async`` keyword. [6.0.13] `(Issue #830) <https://github.com/apple/foundationdb/issues/830>`_
 * Go: ``PrefixRange`` didn't correctly return an error if it failed to generate the range. [6.0.15] `(PR #878) <https://github.com/apple/foundationdb/pull/878>`_

-
 Other Changes
 -------------