551 lines
16 KiB
551 lines
16 KiB
# vector.py
# This source file is part of the FoundationDB open source project
# Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.
"""FoundationDB Vector Layer.
Provides the Vector() class for storing and manipulating arrays
in FoundationDB.
import fdb
import fdb.tuple
import threading
# This defines a Subspace of keys #
class Subspace (object):
def __init__(self, prefixTuple, rawPrefix=""):
self.rawPrefix = rawPrefix + fdb.tuple.pack(prefixTuple)
def __getitem__(self, name):
return Subspace( (name,), self.rawPrefix )
def key(self):
return self.rawPrefix
def pack(self, tuple):
return self.rawPrefix + fdb.tuple.pack( tuple )
def unpack(self, key):
assert key.startswith(self.rawPrefix)
return fdb.tuple.unpack(key[len(self.rawPrefix):])
def range(self, tuple=()):
p = fdb.tuple.range( tuple )
return slice(self.rawPrefix + p.start, self.rawPrefix + p.stop)
# _ImplicitTransaction #
# A local class which is used to allow vector operations to be performed without
# explicitly passing a transaction. It is created by vector.use_transaction
# and is used as follows:
# with vector.use_transaction(tr):
# vector[0] = 1
# vector.push(1)
# ...
class _ImplicitTransaction:
def __init__(self, vector, tr):
self.vector = vector
self.tr = tr
self.initialValue = self.vector.local.tr
def __enter__(self):
if self.initialValue is not None and self.vector.local.tr != self.tr:
raise Exception('use_transaction cannot be nested')
self.vector.local.tr = self.tr
def __exit__(self, type, value, traceback):
self.vector.local.tr = self.initialValue
# Vector #
## Vector stores each of its values using its index as the key.
## The size of a vector is equal to the index of its last key + 1.
## For indexes smaller than the vector's size that have no associated key
## in the database, the value will be the specified defaultValue.
## If the last value in the vector has the default value, its key will
## always be set so that size can be determined.
## By creating Vector with a Subspace, all kv pairs modified by the
## layer will have keys that start within that Subspace.
class Vector:
"""Represents a potentially sparse array in FoundationDB."""
# Public functions
def __init__(self, subspace, defaultValue = ''):
self.subspace = subspace
self.defaultValue = defaultValue
self.local = threading.local()
self.local.tr = None
def use_transaction(self, tr):
Get an object that can be used in a with statement to perform operations
on this vector without supplying a transaction as an argument to each operation.
For example:
with vector.use_transaction(tr):
vector[0] = 1
return _ImplicitTransaction(self, tr)
def size(self, tr=None):
"""Get the number of items in the Vector. This number includes the sparsely represented items."""
return self._size(self._to_transaction(tr))
def push(self, val, tr=None):
"""Push a single item onto the end of the Vector."""
self._push(val, self._to_transaction(tr))
def back(self, tr=None):
"""Get the value of the last item in the Vector."""
return self._back(self._to_transaction(tr))
def front(self, tr=None):
"""Get the value of the first item in the Vector."""
return self._get(0, self._to_transaction(tr))
def pop(self, tr=None):
"""Get and pops the last item off the Vector."""
return self._pop(self._to_transaction(tr))
def swap(self, i1, i2, tr=None):
"""Swap the items at positions i1 and i2."""
self._swap(i1, i2, self._to_transaction(tr))
def get(self, index, tr=None):
"""Get the item at the specified index."""
return self._get(index, self._to_transaction(tr))
def get_range(self, startIndex=None, endIndex=None, step=None, tr=None):
"""Get a range of items in the Vector, returned as a generator."""
return self._get_range(startIndex, endIndex, step, self._to_transaction(tr))
def set(self, index, val, tr=None):
"""Set the value at a particular index in the Vector."""
self._set(index, val, self._to_transaction(tr))
def empty(self, tr=None):
"""Test whether the Vector is empty."""
return self._size(self._to_transaction(tr)) == 0
def resize(self, length, tr=None):
"""Grow or shrink the size of the Vector."""
self._resize(length, self._to_transaction(tr))
def clear(self, tr=None):
"""Remove all items from the Vector."""
# Vector supports array notation when combined with use_transaction
def __setitem__(self, index, val):
"""Set the item at the specified index. Can only be used with use_transaction."""
self.set(index, val)
def __getitem__(self, index):
Get the item(s) at the specified index or range. Ranges are returned as a generator. Can only
be used with use_transaction()
if isinstance(index, slice):
return self.get_range(index.start, index.stop, index.step)
return self.get(index)
# Private functions
def _push(self, val, tr):
tr[self._key_at(self.size())] = fdb.tuple.pack((val,))
def _back(self, tr):
keyRange = self.subspace.range()
last = tr.get_range(keyRange.start, keyRange.stop, 1, True)
for k,v in last:
return fdb.tuple.unpack(v)[0]
return None
def _pop(self, tr):
keyRange = self.subspace.range()
# Read the last two entries so we can check if the second to last item
# is being represented sparsely. If so, we will be required to set it
# to the default value
lastTwo = list(tr.get_range(keyRange.start, keyRange.stop, 2, True))
indices = [self.subspace.unpack(kv.key)[0] for kv in lastTwo]
# Vector was empty
if len(lastTwo) == 0:
return None
# Vector has size one:
elif indices[0] == 0:
# Second to last item is being represented sparsely
elif len(lastTwo) == 1 or indices[0] > indices[1] + 1:
tr[self._key_at(indices[0]-1)] = fdb.tuple.pack((self.defaultValue,))
del tr[lastTwo[0].key]
return fdb.tuple.unpack(lastTwo[0].value)[0]
def _swap(self, i1, i2, tr):
k1 = self._key_at(i1)
k2 = self._key_at(i2)
currentSize = self._size(tr)
v1 = tr[k1]
v2 = tr[k2]
if i1 > currentSize or i2 > currentSize or i1 < 0 or i2 < 0:
raise IndexError('vector.swap: indices (%d, %d) out of range' % (i1, i2))
if v2.present():
tr[k1] = v2
elif v1.present() and i1 < currentSize - 1:
del tr[k1]
if v1.present():
tr[k2] = v1
elif v2.present() and i2 < currentSize - 1:
del tr[k2]
def _get(self, index, tr):
if index < 0:
raise IndexError('vector.get: index \'%d\' out of range' % index)
start = self._key_at(index)
end = self.subspace.range().stop
output = tr.get_range(start,end,1)
for k,v in output:
# The requested index had an associated key
if(start == k):
return fdb.tuple.unpack(v)[0]
# The requested index is sparsely represented
return self.defaultValue
# We requested a value past the end of the vector
raise IndexError('vector.get: index \'%d\' out of range' % index)
def _get_range(self, startIndex, endIndex, step, tr):
size = self._size(tr)
if startIndex is not None and startIndex < 0:
startIndex = max(0, size + startIndex)
if endIndex is not None and endIndex < 0:
endIndex = max(0, size + endIndex)
if step is None:
if startIndex is None or endIndex is None or startIndex <= endIndex:
step = 1
step = -1
elif step == 0:
raise ValueError('vector.get_range: step cannot be zero')
if startIndex is None:
if step > 0:
start = self.subspace.range().start
end = self.subspace.range().stop
if step > 0:
start = self._key_at(startIndex)
end = self._key_at(startIndex+1)
if endIndex is None:
if step > 0:
end = self.subspace.range().stop
start = self.subspace.range().start
if step > 0:
end = self._key_at(endIndex)
start = self._key_at(endIndex+1)
result = tr.get_range(start, end, 0, step < 0)
currentIndex = startIndex
if currentIndex is None:
if step > 0:
currentIndex = 0
currentIndex = size-1
elif currentIndex >= size:
currentIndex = size-1
for k,v in result:
keyIndex = self.subspace.unpack(k)[0]
while (step > 0 and currentIndex < keyIndex) or (step < 0 and currentIndex > keyIndex):
currentIndex = currentIndex + step
yield self.defaultValue
if currentIndex == keyIndex:
currentIndex = currentIndex + step
yield fdb.tuple.unpack(v)[0]
def _size(self, tr):
keyRange = self.subspace.range()
lastKey = tr.get_key(fdb.KeySelector.last_less_or_equal(keyRange.stop))
if lastKey < keyRange.start:
return 0
return self.subspace.unpack(lastKey)[0] + 1
def _set(self, index, val, tr):
tr[self._key_at(index)] = fdb.tuple.pack((val,))
def _resize(self, length, tr):
currentSize = self.size()
if(length == currentSize):
if(length < currentSize):
self._shrink(tr, length, currentSize)
self._expand(tr, length, currentSize)
def _shrink(self, tr, length, currentSize):
tr.clear_range(self._key_at(length), self.subspace.range().stop)
# Check if the new end of the vector was being sparsely represented
if self._size(tr) < length:
tr[self._key_at(length-1)] = fdb.tuple.pack((self.defaultValue,))
def _expand(self, tr, length, currentSize):
tr[self._key_at(length-1)] = fdb.tuple.pack((self.defaultValue,))
def _clear(self, tr):
del tr[self.subspace.range()]
def _to_transaction(self, tr):
if tr is None:
if self.local.tr is None:
raise Exception('No transaction specified and use_transaction has not been called')
return self.local.tr
return tr
def _key_at(self, index):
return self.subspace.pack((index,))
# internal tests #
# caution: modifies the database!
def vector_test(tr):
vector = Vector(Subspace(('test_vector',)), 0)
with vector.use_transaction(tr):
print 'Clearing any previous values in the vector'
print '\nMODIFIERS'
# Set + Push
vector[0] = 1
vector[1] = 2
_print_vector(vector, tr)
# Swap
_print_vector(vector, tr)
# Pop
print 'Popped:', vector.pop();
_print_vector(vector, tr)
# Clear
print 'Pop empty:', vector.pop()
_print_vector(vector, tr)
print 'Pop size 1:', vector.pop()
_print_vector(vector, tr)
# Capacity
print 'Size:', vector.size()
print 'Empty:', vector.empty()
print 'Resizing to length 5'
_print_vector(vector, tr)
print 'Size:', vector.size()
print 'Setting values'
vector[0] = 'The'
vector[1] = 'Quick'
vector[2] = 'Brown'
vector[3] = 'Fox'
vector[4] = 'Jumps'
vector[5] = 'Over'
_print_vector(vector, tr)
print '\nFRONT'
print vector.front()
print '\nBACK'
print vector.back()
print 'Index 0:', vector[0]
print 'Index 5:', vector[5]
_print_vector(vector, tr)
print 'Size:', vector.size()
print '\nRESIZING'
print 'Resizing to 3'
_print_vector(vector, tr)
print 'Size:', vector.size()
print 'Resizing to 3 again'
_print_vector(vector, tr)
print 'Size:', vector.size()
print 'Resizing to 6'
_print_vector(vector, tr)
print 'Size:', vector.size()
print '\nSPARSE TESTS'
print 'Popping sparse vector'
_print_vector(vector, tr)
print 'Size:', vector.size()
print 'Resizing to 4'
_print_vector(vector, tr)
print 'Size:', vector.size()
print 'Adding "word" to index 10, resize to 25'
vector[10] = 'word'
_print_vector(vector, tr)
print 'Size:', vector.size()
print 'Popping sparse vector'
_print_vector(vector, tr)
print 'Size:', vector.size()
print 'Swapping with sparse element'
vector.swap(10, 15)
_print_vector(vector, tr)
print 'Size:', vector.size()
print 'Swapping sparse elements'
vector.swap(12, 13)
_print_vector(vector, tr)
print 'Size:', vector.size()
# Vector sample usage #
import sys
# caution: modifies the database!
def vector_example(tr):
vector = Vector(Subspace(('my_vector',)), 0)
with vector.use_transaction(tr):
for i in range(0, 5):
_print_vector(vector, tr)
print 'Pop last item: %d' % vector.pop()
_print_vector(vector, tr)
vector[1] = 10
_print_vector(vector, tr)
vector.swap(1, 10)
_print_vector(vector, tr)
def _print_vector(vector, tr):
first = True
with vector.use_transaction(tr):
for v in vector:
if not first:
first = False
# caution: modifies the database!
if __name__ == '__main__':
db = fdb.open()