Merge branch 'master' into mengxu/fr-code-improvement-PR

2020-04-22 08:57:27 -07:00 · 2020-04-22 08:57:27 -07:00 · 38193a3866
parent 9b2f6d5c13 9b4a1ca78f
commit 38193a3866
24 changed files with 639 additions and 148 deletions
--- a/14
+++ b/14
@ -504,4 +504,16 @@ Armon Dadgar (ART)
    LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
           ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
           (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Copyright (C) 2009 The Guava Authors
+	
+	Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+	in compliance with the License. You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+ 	Unless required by applicable law or agreed to in writing, software distributed under the License
+	is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+	or implied. See the License for the specific language governing permissions and limitations under
+	the License.
--- a/bindings/java/CMakeLists.txt
+++ b/bindings/java/CMakeLists.txt
@ -56,6 +56,7 @@ set(JAVA_BINDING_SRCS
  src/main/com/apple/foundationdb/testing/Promise.java
  src/main/com/apple/foundationdb/testing/PerfMetric.java
  src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java
+  src/main/com/apple/foundationdb/tuple/FastByteComparisons.java
  src/main/com/apple/foundationdb/tuple/IterableComparator.java
  src/main/com/apple/foundationdb/tuple/package-info.java
  src/main/com/apple/foundationdb/tuple/StringUtil.java
--- a/bindings/java/fdbJNI.cpp
+++ b/bindings/java/fdbJNI.cpp
@ -36,6 +36,11 @@ static JavaVM* g_jvm = nullptr;
 static thread_local JNIEnv* g_thread_jenv = nullptr;  // Defined for the network thread once it is running, and for any thread that has called registerCallback
 static thread_local jmethodID g_IFutureCallback_call_methodID = JNI_NULL;
 static thread_local bool is_external = false;
+static jclass range_result_summary_class;
+static jclass range_result_class;
+static jclass string_class;
+static jmethodID range_result_init;
+static jmethodID range_result_summary_init;

 void detachIfExternalThread(void *ignore) {
 	if(is_external && g_thread_jenv != nullptr) {
@ -275,10 +280,9 @@ JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureStrings_FutureString
 		return JNI_NULL;
 	}

-	jclass str_clazz = jenv->FindClass("java/lang/String");
 	if( jenv->ExceptionOccurred() )
 		return JNI_NULL;
-	jobjectArray arr = jenv->NewObjectArray(count, str_clazz, JNI_NULL);
+	jobjectArray arr = jenv->NewObjectArray(count, string_class, JNI_NULL);
 	if( !arr ) {
 		if( !jenv->ExceptionOccurred() )
 			throwOutOfMem(jenv);
@ -306,13 +310,6 @@ JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureResults_FutureResult
 		throwParamNotNull(jenv);
 		return JNI_NULL;
 	}
-
-	jclass resultCls = jenv->FindClass("com/apple/foundationdb/RangeResultSummary");
-	if( jenv->ExceptionOccurred() )
-		return JNI_NULL;
-	jmethodID resultCtorId = jenv->GetMethodID(resultCls, "<init>", "([BIZ)V");
-	if( jenv->ExceptionOccurred() )
-		return JNI_NULL;
 
 	FDBFuture *f = (FDBFuture *)future;

@ -337,7 +334,7 @@ JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureResults_FutureResult
 		jenv->SetByteArrayRegion(lastKey, 0, kvs[count - 1].key_length, (jbyte *)kvs[count - 1].key);
 	}

-	jobject result = jenv->NewObject(resultCls, resultCtorId, lastKey, count, (jboolean)more);
+	jobject result = jenv->NewObject(range_result_summary_class, range_result_summary_init, lastKey, count, (jboolean)more);
 	if( jenv->ExceptionOccurred() )
 		return JNI_NULL;

@ -350,9 +347,6 @@ JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureResults_FutureResult
 		throwParamNotNull(jenv);
 		return JNI_NULL;
 	}
-
-	jclass resultCls = jenv->FindClass("com/apple/foundationdb/RangeResult");
-	jmethodID resultCtorId = jenv->GetMethodID(resultCls, "<init>", "([B[IZ)V");
 
 	FDBFuture *f = (FDBFuture *)future;

@ -414,7 +408,7 @@ JNIEXPORT jobject JNICALL Java_com_apple_foundationdb_FutureResults_FutureResult
 	jenv->ReleaseByteArrayElements(keyValueArray, (jbyte *)keyvalues_barr, 0);
 	jenv->ReleaseIntArrayElements(lengthArray, length_barr, 0);

-	jobject result = jenv->NewObject(resultCls, resultCtorId, keyValueArray, lengthArray, (jboolean)more);
+	jobject result = jenv->NewObject(range_result_class, range_result_init, keyValueArray, lengthArray, (jboolean)more);
 	if( jenv->ExceptionOccurred() )
 		return JNI_NULL;

@ -1042,8 +1036,43 @@ JNIEXPORT void JNICALL Java_com_apple_foundationdb_FDB_Network_1stop(JNIEnv *jen
 }

 jint JNI_OnLoad(JavaVM *vm, void *reserved) {
+    JNIEnv *env;
 	g_jvm = vm;
-	return JNI_VERSION_1_1;
+	if (vm->GetEnv((void**)&env, JNI_VERSION_1_6) != JNI_OK) {
+		return JNI_ERR;
+	} else {
+		jclass local_range_result_class = env->FindClass("com/apple/foundationdb/RangeResult");
+		range_result_init = env->GetMethodID(local_range_result_class, "<init>", "([B[IZ)V");
+		range_result_class = (jclass) (env)->NewGlobalRef(local_range_result_class);
+
+		jclass local_range_result_summary_class = env->FindClass("com/apple/foundationdb/RangeResultSummary");
+		range_result_summary_init = env->GetMethodID(local_range_result_summary_class, "<init>", "([BIZ)V");
+		range_result_summary_class = (jclass) (env)->NewGlobalRef(local_range_result_summary_class);
+
+		jclass local_string_class = env->FindClass("java/lang/String");
+		string_class = (jclass) (env)->NewGlobalRef(local_string_class);
+
+		return JNI_VERSION_1_6;
+	}
+}
+
+// Is automatically called once the Classloader is destroyed
+void JNI_OnUnload(JavaVM *vm, void *reserved) {
+	JNIEnv* env;
+	if (vm->GetEnv((void**)&env, JNI_VERSION_1_6) != JNI_OK) {
+		return;
+	} else {
+		// delete global references so the GC can collect them
+		if (range_result_summary_class != NULL) {
+			env->DeleteGlobalRef(range_result_summary_class);
+		}
+		if (range_result_class != NULL) {
+			env->DeleteGlobalRef(range_result_class);
+		}
+		if (string_class != NULL) {
+			env->DeleteGlobalRef(string_class);
+		}
+	}
 }

 #ifdef __cplusplus
--- a/bindings/java/src/junit/com/apple/foundationdb/tuple/ArrayUtilTests.java
+++ b/bindings/java/src/junit/com/apple/foundationdb/tuple/ArrayUtilTests.java
@ -304,4 +304,58 @@ public class ArrayUtilTests {
 		fail("Not yet implemented");
 	}

+	private static final int SAMPLE_COUNT = 1000000;
+	private static final int SAMPLE_MAX_SIZE = 2048;
+	private List<byte[]> unsafe;
+	private List<byte[]> java;
+	@Before
+	public void init() {
+		unsafe = new ArrayList(SAMPLE_COUNT);
+		java = new ArrayList(SAMPLE_COUNT);
+		Random random = new Random();
+		for (int i = 0; i <= SAMPLE_COUNT; i++) {
+			byte[] addition = new byte[random.nextInt(SAMPLE_MAX_SIZE)];
+			random.nextBytes(addition);
+			unsafe.add(addition);
+			java.add(addition);
+		}
+	}
+
+	@Test
+	public void testComparatorSort() {
+		Collections.sort(unsafe, FastByteComparisons.lexicographicalComparerUnsafeImpl());
+		Collections.sort(java, FastByteComparisons.lexicographicalComparerJavaImpl());
+		Assert.assertTrue(unsafe.equals(java));
+	}
+
+	@Test
+	public void testUnsafeComparison() {
+		for (int i =0; i< SAMPLE_COUNT; i++) {
+			Assert.assertEquals(FastByteComparisons.lexicographicalComparerUnsafeImpl().compare(unsafe.get(i), java.get(i)), 0);
+		}
+	}
+
+	@Test
+	public void testJavaComparison() {
+		for (int i =0; i< SAMPLE_COUNT; i++) {
+			Assert.assertEquals(FastByteComparisons.lexicographicalComparerJavaImpl().compare(unsafe.get(i), java.get(i)), 0);
+		}
+	}
+
+	@Test
+	public void testUnsafeComparisonWithOffet() {
+		for (int i =0; i< SAMPLE_COUNT; i++) {
+			if (unsafe.get(i).length > 5)
+				Assert.assertEquals(FastByteComparisons.lexicographicalComparerUnsafeImpl().compareTo(unsafe.get(i), 4, unsafe.get(i).length - 4,  java.get(i), 4, java.get(i).length - 4), 0);
+		}
+	}
+
+	@Test
+	public void testJavaComparisonWithOffset() {
+		for (int i =0; i< SAMPLE_COUNT; i++) {
+			if (unsafe.get(i).length > 5)
+				Assert.assertEquals(FastByteComparisons.lexicographicalComparerJavaImpl().compareTo(unsafe.get(i), 4, unsafe.get(i).length - 4,  java.get(i), 4, java.get(i).length - 4), 0);
+		}
+	}
+
 }
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/ByteArrayUtil.java
@ -34,7 +34,7 @@ import com.apple.foundationdb.Transaction;
 *  {@link #printable(byte[])} for debugging non-text keys and values.
 *
 */
-public class ByteArrayUtil {
+public class ByteArrayUtil extends FastByteComparisons {

 	/**
 	 * Joins a set of byte arrays into a larger array. The {@code interlude} is placed
@ -135,11 +135,7 @@ public class ByteArrayUtil {
 		if(src.length < start + pattern.length)
 			return false;

-		for(int i = 0; i < pattern.length; i++)
-			if(pattern[i] != src[start + i])
-				return false;
-
-		return true;
+		return compareTo(src, start, pattern.length, pattern, 0, pattern.length) == 0;
 	}

 	/**
@ -307,14 +303,7 @@ public class ByteArrayUtil {
 	 *  {@code r}.
 	 */
 	public static int compareUnsigned(byte[] l, byte[] r) {
-		for(int idx = 0; idx < l.length && idx < r.length; ++idx) {
-			if(l[idx] != r[idx]) {
-				return (l[idx] & 0xFF) < (r[idx] & 0xFF) ? -1 : 1;
-			}
-		}
-		if(l.length == r.length)
-			return 0;
-		return l.length < r.length ? -1 : 1;
+		return compareTo(l, 0, l.length, r, 0, r.length);
 	}

 	/**
@ -328,15 +317,11 @@ public class ByteArrayUtil {
 	 * @return {@code true} if {@code array} starts with {@code prefix}
 	 */
 	public static boolean startsWith(byte[] array, byte[] prefix) {
+		// Short Circuit
 		if(array.length < prefix.length) {
 			return false;
 		}
-		for(int i = 0; i < prefix.length; ++i) {
-			if(prefix[i] != array[i]) {
-				return false;
-			}
-		}
-		return true;
+		return compareTo(array, 0, prefix.length, prefix, 0, prefix.length) == 0;
 	}

 	/**
--- a/bindings/java/src/main/com/apple/foundationdb/tuple/FastByteComparisons.java
+++ b/bindings/java/src/main/com/apple/foundationdb/tuple/FastByteComparisons.java
@ -0,0 +1,294 @@
+/*
+ * ByteArrayUtil.java
+ *
+ * This source file is part of the FoundationDB open source project
+ *
+ * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.apple.foundationdb.tuple;
+
+import java.lang.reflect.Field;
+import java.nio.ByteOrder;
+import java.security.AccessController;
+import java.security.PrivilegedAction;
+import java.util.Comparator;
+
+import sun.misc.Unsafe;
+
+
+/**
+ * Utility code to do optimized byte-array comparison.
+ * This is borrowed and slightly modified from Guava's {@link UnsignedBytes}
+ * class to be able to compare arrays that start at non-zero offsets.
+ */
+abstract class FastByteComparisons {
+
+    private static final int UNSIGNED_MASK = 0xFF;
+    /**
+     * Lexicographically compare two byte arrays.
+     * 
+     * @param buffer1 left operand, expected to not be null
+     * @param buffer2 right operand, expected to not be null
+     * @param offset1 Where to start comparing in the left buffer, expected to be >= 0
+     * @param offset2 Where to start comparing in the right buffer, expected to be >= 0
+     * @param length1 How much to compare from the left buffer, expected to be >= 0
+     * @param length2 How much to compare from the right buffer, expected to be >= 0
+     * @return 0 if equal, < 0 if left is less than right, etc.
+     */
+    public static int compareTo(byte[] buffer1, int offset1, int length1,
+            byte[] buffer2, int offset2, int length2) {
+        return LexicographicalComparerHolder.BEST_COMPARER.compareTo(
+                buffer1, offset1, length1, buffer2, offset2, length2);
+    }
+    /**
+     * Interface for both the java and unsafe comparators + offset based comparisons.
+     * @param <T>
+     */
+    interface Comparer<T> extends Comparator<T> {
+        /**
+         * Lexicographically compare two byte arrays.
+         * 
+         * @param buffer1 left operand
+         * @param buffer2 right operand
+         * @param offset1 Where to start comparing in the left buffer
+         * @param offset2 Where to start comparing in the right buffer
+         * @param length1 How much to compare from the left buffer
+         * @param length2 How much to compare from the right buffer
+         * @return 0 if equal, < 0 if left is less than right, etc.
+         */
+        abstract public int compareTo(T buffer1, int offset1, int length1,
+                                      T buffer2, int offset2, int length2);
+    }
+
+    /**
+     * Pure Java Comparer
+     *
+     * @return
+     */
+    static Comparer<byte[]> lexicographicalComparerJavaImpl() {
+        return LexicographicalComparerHolder.PureJavaComparer.INSTANCE;
+    }
+
+    /**
+     * Unsafe Comparer
+     *
+     * @return
+     */
+    static Comparer<byte[]> lexicographicalComparerUnsafeImpl() {
+        return LexicographicalComparerHolder.UnsafeComparer.INSTANCE;
+    }
+
+
+    /**
+     * Provides a lexicographical comparer implementation; either a Java
+     * implementation or a faster implementation based on {@link Unsafe}.
+     *
+     * <p>Uses reflection to gracefully fall back to the Java implementation if
+     * {@code Unsafe} isn't available.
+     */
+    private static class LexicographicalComparerHolder {
+        static final String UNSAFE_COMPARER_NAME =
+                LexicographicalComparerHolder.class.getName() + "$UnsafeComparer";
+
+        static final Comparer<byte[]> BEST_COMPARER = getBestComparer();
+        /**
+         * Returns the Unsafe-using Comparer, or falls back to the pure-Java
+         * implementation if unable to do so.
+         */
+        static Comparer<byte[]> getBestComparer() {
+            String arch = System.getProperty("os.arch");
+            boolean unaligned = arch.equals("i386") || arch.equals("x86")
+                    || arch.equals("amd64") || arch.equals("x86_64");
+            if (!unaligned)
+                return lexicographicalComparerJavaImpl();
+            try {
+                Class<?> theClass = Class.forName(UNSAFE_COMPARER_NAME);
+
+                // yes, UnsafeComparer does implement Comparer<byte[]>
+                @SuppressWarnings("unchecked")
+                Comparer<byte[]> comparer =
+                        (Comparer<byte[]>) theClass.getEnumConstants()[0];
+                return comparer;
+            } catch (Throwable t) { // ensure we really catch *everything*
+                return lexicographicalComparerJavaImpl();
+            }
+        }
+
+        /**
+         * Java Comparer doing byte by byte comparisons
+         *
+         */
+        enum PureJavaComparer implements Comparer<byte[]> {
+            INSTANCE;
+
+            /**
+             *
+             * CompareTo looking at two buffers.
+             *
+             * @param buffer1 left operand
+             * @param buffer2 right operand
+             * @param offset1 Where to start comparing in the left buffer
+             * @param offset2 Where to start comparing in the right buffer
+             * @param length1 How much to compare from the left buffer
+             * @param length2 How much to compare from the right buffer
+             * @return 0 if equal, < 0 if left is less than right, etc.
+             */
+            @Override
+            public int compareTo(byte[] buffer1, int offset1, int length1,
+                                 byte[] buffer2, int offset2, int length2) {
+                // Short circuit equal case
+                if (buffer1 == buffer2 &&
+                        offset1 == offset2 &&
+                        length1 == length2) {
+                    return 0;
+                }
+                int end1 = offset1 + length1;
+                int end2 = offset2 + length2;
+                for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
+                    int a = (buffer1[i] & UNSIGNED_MASK);
+                    int b = (buffer2[j] & UNSIGNED_MASK);
+                    if (a != b) {
+                        return a - b;
+                    }
+                }
+                return length1 - length2;
+            }
+
+            /**
+             * Supports Comparator
+             *
+             * @param o1
+             * @param o2
+             * @return comparison
+             */
+            @Override
+            public int compare(byte[] o1, byte[] o2) {
+                return compareTo(o1, 0, o1.length, o2, 0, o2.length);
+            }
+        }
+
+        /**
+         *
+         * Takes advantage of word based comparisons
+         *
+         */
+        @SuppressWarnings("unused") // used via reflection
+        enum UnsafeComparer implements Comparer<byte[]> {
+            INSTANCE;
+
+            static final Unsafe theUnsafe;
+
+            /**
+             * The offset to the first element in a byte array.
+             */
+            static final int BYTE_ARRAY_BASE_OFFSET;
+
+            @Override
+            public int compare(byte[] o1, byte[] o2) {
+                return compareTo(o1, 0, o1.length, o2, 0, o2.length);
+            }
+
+            static {
+                theUnsafe = (Unsafe) AccessController.doPrivileged(
+                        (PrivilegedAction<Object>) () -> {
+                            try {
+                                Field f = Unsafe.class.getDeclaredField("theUnsafe");
+                                f.setAccessible(true);
+                                return f.get(null);
+                            } catch (NoSuchFieldException e) {
+                                // It doesn't matter what we throw;
+                                // it's swallowed in getBestComparer().
+                                throw new Error();
+                            } catch (IllegalAccessException e) {
+                                throw new Error();
+                            }
+                        });
+
+                BYTE_ARRAY_BASE_OFFSET = theUnsafe.arrayBaseOffset(byte[].class);
+
+                // sanity check - this should never fail
+                if (theUnsafe.arrayIndexScale(byte[].class) != 1) {
+                    throw new AssertionError();
+                }
+            }
+
+            static final boolean LITTLE_ENDIAN =
+                    ByteOrder.nativeOrder().equals(ByteOrder.LITTLE_ENDIAN);
+
+            /**
+             * Lexicographically compare two arrays.
+             *
+             * @param buffer1 left operand
+             * @param buffer2 right operand
+             * @param offset1 Where to start comparing in the left buffer
+             * @param offset2 Where to start comparing in the right buffer
+             * @param length1 How much to compare from the left buffer
+             * @param length2 How much to compare from the right buffer
+             * @return 0 if equal, < 0 if left is less than right, etc.
+             */
+            @Override
+            public int compareTo(byte[] buffer1, int offset1, int length1,
+                                 byte[] buffer2, int offset2, int length2) {
+                // Short circuit equal case
+                if (buffer1 == buffer2 &&
+                        offset1 == offset2 &&
+                        length1 == length2) {
+                    return 0;
+                }
+                final int stride = 8;
+                final int minLength = Math.min(length1, length2);
+                int strideLimit = minLength & ~(stride - 1);
+                final long offset1Adj = offset1 + BYTE_ARRAY_BASE_OFFSET;
+                final long offset2Adj = offset2 + BYTE_ARRAY_BASE_OFFSET;
+                int i;
+
+                /*
+                 * Compare 8 bytes at a time. Benchmarking on x86 shows a stride of 8 bytes is no slower
+                 * than 4 bytes even on 32-bit. On the other hand, it is substantially faster on 64-bit.
+                 */
+                for (i = 0; i < strideLimit; i += stride) {
+                    long lw = theUnsafe.getLong(buffer1, offset1Adj + i);
+                    long rw = theUnsafe.getLong(buffer2, offset2Adj + i);
+                    if (lw != rw) {
+                        if(!LITTLE_ENDIAN) {
+                            return ((lw + Long.MIN_VALUE) < (rw + Long.MIN_VALUE)) ? -1 : 1;
+                        }
+
+                        /*
+                         * We want to compare only the first index where left[index] != right[index]. This
+                         * corresponds to the least significant nonzero byte in lw ^ rw, since lw and rw are
+                         * little-endian. Long.numberOfTrailingZeros(diff) tells us the least significant
+                         * nonzero bit, and zeroing out the first three bits of L.nTZ gives us the shift to get
+                         * that least significant nonzero byte. This comparison logic is based on UnsignedBytes
+                         * comparator from guava v21
+                         */
+                        int n = Long.numberOfTrailingZeros(lw ^ rw) & ~0x7;
+                        return ((int) ((lw >>> n) & UNSIGNED_MASK)) - ((int) ((rw >>> n) & UNSIGNED_MASK));
+                    }
+                }
+
+                // The epilogue to cover the last (minLength % stride) elements.
+                for (; i < minLength; i++) {
+                    int a = (buffer1[offset1 + i] & UNSIGNED_MASK);
+                    int b = (buffer2[offset2 + i] & UNSIGNED_MASK);
+                    if (a != b) {
+                        return a - b;
+                    }
+                }
+                return length1 - length2;
+            }
+        }
+    }
+}
--- a/build/gen_dev_docker.sh
+++ b/build/gen_dev_docker.sh
@ -75,7 +75,7 @@ sudo docker run --rm `# delete (temporary) image after return` \\
                ${image} "\$@"
 EOF

-cat <<EOF $HOME/bin/clangd
+cat <<EOF > $HOME/bin/clangd
 #!/usr/bin/bash

 fdb-dev scl enable devtoolset-8 rh-python36 rh-ruby24 -- clangd
--- a/cmake/InstallLayout.cmake
+++ b/cmake/InstallLayout.cmake
@ -320,9 +320,14 @@ set(CPACK_RPM_SERVER-EL7_USER_FILELIST
  "%config(noreplace) /etc/foundationdb/foundationdb.conf"
  "%attr(0700,foundationdb,foundationdb) /var/log/foundationdb"
  "%attr(0700, foundationdb, foundationdb) /var/lib/foundationdb")
+set(CPACK_RPM_CLIENTS-EL6_USER_FILELIST "%dir /etc/foundationdb")
+set(CPACK_RPM_CLIENTS-EL7_USER_FILELIST "%dir /etc/foundationdb")
 set(CPACK_RPM_EXCLUDE_FROM_AUTO_FILELIST_ADDITION
  "/usr/sbin"
  "/usr/share/java"
+  "/usr/lib64/cmake"
+  "/etc/foundationdb"
+  "/usr/lib64/pkgconfig"
  "/usr/lib64/python2.7"
  "/usr/lib64/python2.7/site-packages"
  "/var"
--- a/fdbclient/BackupAgent.actor.h
+++ b/fdbclient/BackupAgent.actor.h
@ -934,5 +934,10 @@ struct StringRefReader {
 	Error failure_error;
 };

+namespace fileBackup {
+ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<IAsyncFile> file, int64_t offset,
+                                                                      int len);
+}
+
 #include "flow/unactorcompiler.h"
 #endif
--- a/fdbclient/BackupContainer.actor.cpp
+++ b/fdbclient/BackupContainer.actor.cpp
@ -20,6 +20,7 @@

 #include "fdbclient/BackupContainer.h"
 #include "fdbclient/BackupAgent.actor.h"
+#include "fdbclient/FDBTypes.h"
 #include "fdbclient/JsonBuilder.h"
 #include "flow/Trace.h"
 #include "flow/UnitTest.h"
@ -424,9 +425,11 @@ public:
 	}

 	// TODO:  Do this more efficiently, as the range file list for a snapshot could potentially be hundreds of megabytes.
-	ACTOR static Future<std::vector<RangeFile>> readKeyspaceSnapshot_impl(Reference<BackupContainerFileSystem> bc, KeyspaceSnapshotFile snapshot) {
+	ACTOR static Future<std::pair<std::vector<RangeFile>, std::map<std::string, KeyRange>>> readKeyspaceSnapshot_impl(
+	    Reference<BackupContainerFileSystem> bc, KeyspaceSnapshotFile snapshot) {
 		// Read the range file list for the specified version range, and then index them by fileName.
-		// This is so we can verify that each of the files listed in the manifest file are also in the container at this time.
+		// This is so we can verify that each of the files listed in the manifest file are also in the container at this
+		// time.
 		std::vector<RangeFile> files = wait(bc->listRangeFiles(snapshot.beginVersion, snapshot.endVersion));
 		state std::map<std::string, RangeFile> rangeIndex;
 		for(auto &f : files)
@ -482,15 +485,38 @@ public:
 			throw restore_missing_data();
 		}

-		return results;
+		// Check key ranges for files
+		std::map<std::string, KeyRange> fileKeyRanges;
+		JSONDoc ranges = doc.subDoc("keyRanges"); // Create an empty doc if not existed
+		for (auto i : ranges.obj()) {
+			const std::string& filename = i.first;
+			JSONDoc fields(i.second);
+			std::string begin, end;
+			if (fields.tryGet("beginKey", begin) && fields.tryGet("endKey", end)) {
+				TraceEvent("ManifestFields")
+				    .detail("File", filename)
+				    .detail("Begin", printable(StringRef(begin)))
+				    .detail("End", printable(StringRef(end)));
+				fileKeyRanges.emplace(filename, KeyRange(KeyRangeRef(StringRef(begin), StringRef(end))));
+			} else {
+				TraceEvent("MalFormattedManifest").detail("Key", filename);
+				throw restore_corrupted_data();
+			}
+		}
+
+		return std::make_pair(results, fileKeyRanges);
 	}

-	Future<std::vector<RangeFile>> readKeyspaceSnapshot(KeyspaceSnapshotFile snapshot) {
+	Future<std::pair<std::vector<RangeFile>, std::map<std::string, KeyRange>>> readKeyspaceSnapshot(
+	    KeyspaceSnapshotFile snapshot) {
 		return readKeyspaceSnapshot_impl(Reference<BackupContainerFileSystem>::addRef(this), snapshot);
 	}

-	ACTOR static Future<Void> writeKeyspaceSnapshotFile_impl(Reference<BackupContainerFileSystem> bc, std::vector<std::string> fileNames, int64_t totalBytes) {
-		ASSERT(!fileNames.empty());
+	ACTOR static Future<Void> writeKeyspaceSnapshotFile_impl(Reference<BackupContainerFileSystem> bc,
+	                                                         std::vector<std::string> fileNames,
+	                                                         std::vector<std::pair<Key, Key>> beginEndKeys,
+	                                                         int64_t totalBytes) {
+		ASSERT(!fileNames.empty() && fileNames.size() == beginEndKeys.size());

 		state Version minVer = std::numeric_limits<Version>::max();
 		state Version maxVer = 0;
@ -521,6 +547,13 @@ public:
 		doc.create("beginVersion") = minVer;
 		doc.create("endVersion") = maxVer;

+		auto ranges = doc.subDoc("keyRanges");
+		for (int i = 0; i < beginEndKeys.size(); i++) {
+			auto fileDoc = ranges.subDoc(fileNames[i], /*split=*/false);
+			fileDoc.create("beginKey") = beginEndKeys[i].first.toString();
+			fileDoc.create("endKey") = beginEndKeys[i].second.toString();
+		}
+
 		wait(yield());
 		state std::string docString = json_spirit::write_string(json);

@ -531,8 +564,11 @@ public:
 		return Void();
 	}

-	Future<Void> writeKeyspaceSnapshotFile(std::vector<std::string> fileNames, int64_t totalBytes) final {
-		return writeKeyspaceSnapshotFile_impl(Reference<BackupContainerFileSystem>::addRef(this), fileNames, totalBytes);
+	Future<Void> writeKeyspaceSnapshotFile(const std::vector<std::string>& fileNames,
+	                                       const std::vector<std::pair<Key, Key>>& beginEndKeys,
+	                                       int64_t totalBytes) final {
+		return writeKeyspaceSnapshotFile_impl(Reference<BackupContainerFileSystem>::addRef(this), fileNames,
+		                                      beginEndKeys, totalBytes);
 	};

 	// List log files, unsorted, which contain data at any version >= beginVersion and <= targetVersion.
@ -1193,7 +1229,10 @@ public:
 		std::vector<LogFile> filtered;
 		int i = 0;
 		for (int j = 1; j < logs.size(); j++) {
-			if (logs[j].isSubset(logs[i])) continue;
+			if (logs[j].isSubset(logs[i])) {
+				ASSERT(logs[j].fileSize <= logs[i].fileSize);
+				continue;
+			}

 			if (!logs[i].isSubset(logs[j])) {
 				filtered.push_back(logs[i]);
@ -1249,6 +1288,7 @@ public:
 			// filter out if indices.back() is subset of files[i] or vice versa
 			if (!indices.empty()) {
 				if (logs[indices.back()].isSubset(logs[i])) {
+					ASSERT(logs[indices.back()].fileSize <= logs[i].fileSize);
 					indices.back() = i;
 				} else if (!logs[i].isSubset(logs[indices.back()])) {
 					indices.push_back(i);
@ -1291,6 +1331,29 @@ public:
 		return end;
 	}

+	ACTOR static Future<KeyRange> getSnapshotFileKeyRange_impl(Reference<BackupContainerFileSystem> bc,
+	                                                           RangeFile file) {
+		state Reference<IAsyncFile> inFile = wait(bc->readFile(file.fileName));
+		state bool beginKeySet = false;
+		state Key beginKey;
+		state Key endKey;
+		state int64_t j = 0;
+		for (; j < file.fileSize; j += file.blockSize) {
+			int64_t len = std::min<int64_t>(file.blockSize, file.fileSize - j);
+			Standalone<VectorRef<KeyValueRef>> blockData = wait(fileBackup::decodeRangeFileBlock(inFile, j, len));
+			if (!beginKeySet) {
+				beginKey = blockData.front().key;
+			}
+			endKey = blockData.back().key;
+		}
+		return KeyRange(KeyRangeRef(beginKey, endKey));
+	}
+
+	Future<KeyRange> getSnapshotFileKeyRange(const RangeFile& file) final {
+		ASSERT(g_network->isSimulated());
+		return getSnapshotFileKeyRange_impl(Reference<BackupContainerFileSystem>::addRef(this), file);
+	}
+
 	ACTOR static Future<Optional<RestorableFileSet>> getRestoreSet_impl(Reference<BackupContainerFileSystem> bc, Version targetVersion) {
 		// Find the most recent keyrange snapshot to end at or before targetVersion
 		state Optional<KeyspaceSnapshotFile> snapshot;
@ -1305,12 +1368,26 @@ public:
 			restorable.snapshot = snapshot.get();
 			restorable.targetVersion = targetVersion;

-			std::vector<RangeFile> ranges = wait(bc->readKeyspaceSnapshot(snapshot.get()));
-			restorable.ranges = ranges;
+			std::pair<std::vector<RangeFile>, std::map<std::string, KeyRange>> results =
+			    wait(bc->readKeyspaceSnapshot(snapshot.get()));
+			restorable.ranges = std::move(results.first);
+			restorable.keyRanges = std::move(results.second);
+			if (g_network->isSimulated()) {
+				// Sanity check key ranges
+				state std::map<std::string, KeyRange>::iterator rit;
+				for (rit = restorable.keyRanges.begin(); rit != restorable.keyRanges.end(); rit++) {
+					auto it = std::find_if(restorable.ranges.begin(), restorable.ranges.end(),
+					                       [file = rit->first](const RangeFile f) { return f.fileName == file; });
+					ASSERT(it != restorable.ranges.end());
+					KeyRange result = wait(bc->getSnapshotFileKeyRange(*it));
+					ASSERT(rit->second.begin <= result.begin && rit->second.end >= result.end);
+				}
+			}

 			// No logs needed if there is a complete key space snapshot at the target version.
 			if (snapshot.get().beginVersion == snapshot.get().endVersion &&
 			    snapshot.get().endVersion == targetVersion) {
+				restorable.continuousBeginVersion = restorable.continuousEndVersion = invalidVersion;
 				return Optional<RestorableFileSet>(restorable);
 			}

@ -1335,6 +1412,8 @@ public:
 				// sort by version order again for continuous analysis
 				std::sort(restorable.logs.begin(), restorable.logs.end());
 				if (isPartitionedLogsContinuous(restorable.logs, snapshot.get().beginVersion, targetVersion)) {
+					restorable.continuousBeginVersion = snapshot.get().beginVersion;
+					restorable.continuousEndVersion = targetVersion + 1; // not inclusive
 					return Optional<RestorableFileSet>(restorable);
 				}
 				return Optional<RestorableFileSet>();
@ -1348,6 +1427,8 @@ public:
 				Version end = logs.begin()->endVersion;
 				computeRestoreEndVersion(logs, &restorable.logs, &end, targetVersion);
 				if (end >= targetVersion) {
+					restorable.continuousBeginVersion = logs.begin()->beginVersion;
+					restorable.continuousEndVersion = end;
 					return Optional<RestorableFileSet>(restorable);
 				}
 			}
@ -2070,6 +2151,7 @@ ACTOR Future<Void> testBackupContainer(std::string url) {
 	state std::vector<Future<Void>> writes;
 	state std::map<Version, std::vector<std::string>> snapshots;
 	state std::map<Version, int64_t> snapshotSizes;
+	state std::map<Version, std::vector<std::pair<Key, Key>>> snapshotBeginEndKeys;
 	state int nRangeFiles = 0;
 	state std::map<Version, std::string> logs;
 	state Version v = deterministicRandom()->randomInt64(0, std::numeric_limits<Version>::max() / 2);
@ -2084,6 +2166,7 @@ ACTOR Future<Void> testBackupContainer(std::string url) {
 		while(kvfiles > 0) {
 			if(snapshots.empty()) {
 				snapshots[v] = {};
+				snapshotBeginEndKeys[v] = {};
 				snapshotSizes[v] = 0;
 				if(deterministicRandom()->coinflip()) {
 					v = nextVersion(v);
@ -2093,14 +2176,17 @@ ACTOR Future<Void> testBackupContainer(std::string url) {
 			++nRangeFiles;
 			v = nextVersion(v);
 			snapshots.rbegin()->second.push_back(range->getFileName());
+			snapshotBeginEndKeys.rbegin()->second.emplace_back(LiteralStringRef(""), LiteralStringRef(""));

 			int size = chooseFileSize(fileSizes);
 			snapshotSizes.rbegin()->second += size;
 			writes.push_back(writeAndVerifyFile(c, range, size));

 			if(deterministicRandom()->random01() < .2) {
-				writes.push_back(c->writeKeyspaceSnapshotFile(snapshots.rbegin()->second, snapshotSizes.rbegin()->second));
+				writes.push_back(c->writeKeyspaceSnapshotFile(
+				    snapshots.rbegin()->second, snapshotBeginEndKeys.rbegin()->second, snapshotSizes.rbegin()->second));
 				snapshots[v] = {};
+				snapshotBeginEndKeys[v] = {};
 				snapshotSizes[v] = 0;
 				break;
 			}
--- a/fdbclient/BackupContainer.h
+++ b/fdbclient/BackupContainer.h
@ -108,6 +108,12 @@ struct RangeFile {
 	std::string fileName;
 	int64_t fileSize;

+	RangeFile() {}
+	RangeFile(Version v, uint32_t bSize, std::string name, int64_t size)
+	  : version(v), blockSize(bSize), fileName(name), fileSize(size) {}
+	RangeFile(const RangeFile& f)
+	  : version(f.version), blockSize(f.blockSize), fileName(f.fileName), fileSize(f.fileSize) {}
+
 	// Order by version, break ties with name
 	bool operator< (const RangeFile &rhs) const {
 		return version == rhs.version ? fileName < rhs.fileName : version < rhs.version;
@ -193,6 +199,14 @@ struct RestorableFileSet {
 	Version targetVersion;
 	std::vector<LogFile> logs;
 	std::vector<RangeFile> ranges;
+
+	// Range file's key ranges. Can be empty for backups generated before 6.3.
+	std::map<std::string, KeyRange> keyRanges;
+
+	// Mutation logs continuous range [begin, end). Both can be invalidVersion
+	// when the entire key space snapshot is at the target version.
+	Version continuousBeginVersion, continuousEndVersion;
+
 	KeyspaceSnapshotFile snapshot; // Info. for debug purposes
 };

@ -231,11 +245,17 @@ public:

 	// Write a KeyspaceSnapshotFile of range file names representing a full non overlapping
 	// snapshot of the key ranges this backup is targeting.
-	virtual Future<Void> writeKeyspaceSnapshotFile(std::vector<std::string> fileNames, int64_t totalBytes) = 0;
+	virtual Future<Void> writeKeyspaceSnapshotFile(const std::vector<std::string>& fileNames,
+	                                               const std::vector<std::pair<Key, Key>>& beginEndKeys,
+	                                               int64_t totalBytes) = 0;

 	// Open a file for read by name
 	virtual Future<Reference<IAsyncFile>> readFile(std::string name) = 0;

+	// Returns the key ranges in the snapshot file. This is an expensive function
+	// and should only be used in simulation for sanity check.
+	virtual Future<KeyRange> getSnapshotFileKeyRange(const RangeFile& file) = 0;
+
 	struct ExpireProgress {
 		std::string step;
 		int total;
--- a/fdbclient/FileBackupAgent.actor.cpp
+++ b/fdbclient/FileBackupAgent.actor.cpp
@ -2257,6 +2257,7 @@ namespace fileBackup {
 			}

 			std::vector<std::string> files;
+			std::vector<std::pair<Key, Key>> beginEndKeys;
 			state Version maxVer = 0;
 			state Version minVer = std::numeric_limits<Version>::max();
 			state int64_t totalBytes = 0;
@ -2272,6 +2273,9 @@ namespace fileBackup {
 					// Add file to final file list
 					files.push_back(r.fileName);

+					// Add (beginKey, endKey) pairs to the list
+					beginEndKeys.emplace_back(i->second.begin, i->first);
+
 					// Update version range seen
 					if(r.version < minVer)
 						minVer = r.version;
@ -2293,7 +2297,7 @@ namespace fileBackup {
 			}

 			Params.endVersion().set(task, maxVer);
-			wait(bc->writeKeyspaceSnapshotFile(files, totalBytes));
+			wait(bc->writeKeyspaceSnapshotFile(files, beginEndKeys, totalBytes));

 			TraceEvent(SevInfo, "FileBackupWroteSnapshotManifest")
 				.detail("BackupUID", config.getUid())
--- a/fdbclient/JSONDoc.h
+++ b/fdbclient/JSONDoc.h
@ -193,7 +193,7 @@ struct JSONDoc {
 		return v.get_value<T>();
 	}

-	// Ensures that a an Object exists at path and returns a JSONDoc that writes to it.
+	// Ensures that an Object exists at path and returns a JSONDoc that writes to it.
 	JSONDoc subDoc(std::string path, bool split=true) {
 		json_spirit::mValue &v = create(path, split);
 		if(v.type() != json_spirit::obj_type)
--- a/fdbserver/BackupProgress.actor.cpp
+++ b/fdbserver/BackupProgress.actor.cpp
@ -83,6 +83,15 @@ std::map<std::tuple<LogEpoch, Version, int>, std::map<Tag, Version>> BackupProgr

 		auto progressIt = progress.lower_bound(epoch);
 		if (progressIt != progress.end() && progressIt->first == epoch) {
+			if (progressIt != progress.begin()) {
+				// Previous epoch is gone, consolidate the progress.
+				auto prev = std::prev(progressIt);
+				for (auto [tag, version] : prev->second) {
+					if (tags.count(tag) > 0) {
+						progressIt->second[tag] = std::max(version, progressIt->second[tag]);
+					}
+				}
+			}
 			updateTagVersions(&tagVersions, &tags, progressIt->second, info.epochEnd, adjustedBeginVersion, epoch);
 		} else {
 			auto rit = std::find_if(
--- a/fdbserver/BackupWorker.actor.cpp
+++ b/fdbserver/BackupWorker.actor.cpp
@ -68,13 +68,15 @@ struct BackupData {
 	const UID myId;
 	const Tag tag; // LogRouter tag for this worker, i.e., (-2, i)
 	const int totalTags; // Total log router tags
-	const Version startVersion;
+	// Backup request's commit version. Mutations are logged at some version after this.
+	const Version startVersion; // This worker's start version
 	const Optional<Version> endVersion; // old epoch's end version (inclusive), or empty for current epoch
 	const LogEpoch recruitedEpoch; // current epoch whose tLogs are receiving mutations
 	const LogEpoch backupEpoch; // the epoch workers should pull mutations
 	LogEpoch oldestBackupEpoch = 0; // oldest epoch that still has data on tLogs for backup to pull
 	Version minKnownCommittedVersion;
-	Version savedVersion;
+	Version savedVersion; // Largest version saved to blob storage
+	Version popVersion; // Largest version popped in NOOP mode, can be larger than savedVersion.
 	AsyncVar<Reference<ILogSystem>> logSystem;
 	Database cx;
 	std::vector<VersionedMessage> messages;
@ -225,7 +227,7 @@ struct BackupData {
 	explicit BackupData(UID id, Reference<AsyncVar<ServerDBInfo>> db, const InitializeBackupRequest& req)
 	  : myId(id), tag(req.routerTag), totalTags(req.totalTags), startVersion(req.startVersion),
 	    endVersion(req.endVersion), recruitedEpoch(req.recruitedEpoch), backupEpoch(req.backupEpoch),
-	    minKnownCommittedVersion(invalidVersion), savedVersion(req.startVersion - 1),
+	    minKnownCommittedVersion(invalidVersion), savedVersion(req.startVersion - 1), popVersion(req.startVersion - 1),
 	    cc("BackupWorker", myId.toString()), pulledVersion(0), paused(false) {
 		cx = openDBOnServer(db, TaskPriority::DefaultEndpoint, true, true);

@ -291,7 +293,7 @@ struct BackupData {
 		}
 		ASSERT_WE_THINK(backupEpoch == oldestBackupEpoch);
 		const Tag popTag = logSystem.get()->getPseudoPopTag(tag, ProcessClass::BackupClass);
-		logSystem.get()->pop(savedVersion, popTag);
+		logSystem.get()->pop(std::max(popVersion, savedVersion), popTag);
 	}

 	void stop() {
@ -326,11 +328,15 @@ struct BackupData {
 		}

 		bool modified = false;
+		bool minVersionChanged = false;
+		Version minVersion = std::numeric_limits<Version>::max();
 		for (const auto [uid, version] : uidVersions) {
 			auto it = backups.find(uid);
 			if (it == backups.end()) {
 				modified = true;
 				backups.emplace(uid, BackupData::PerBackupInfo(this, uid, version));
+				minVersion = std::min(minVersion, version);
+				minVersionChanged = true;
 			} else {
 				stopList.erase(uid);
 			}
@ -342,6 +348,14 @@ struct BackupData {
 			it->second.stop();
 			modified = true;
 		}
+		if (minVersionChanged && backupEpoch < recruitedEpoch && savedVersion + 1 == startVersion) {
+			// Advance savedVersion to minimize version ranges in case backupEpoch's
+			// progress is not saved. Master may set a very low startVersion that
+			// is already popped. Advance the version is safe because these
+			// versions are not popped -- if they are popped, their progress should
+			// be already recorded and Master would use a higher version than minVersion.
+			savedVersion = std::max(minVersion, savedVersion);
+		}
 		if (modified) changedTrigger.trigger();
 	}

@ -390,10 +404,10 @@ struct BackupData {
 	Future<Version> getMinKnownCommittedVersion() { return _getMinKnownCommittedVersion(this); }
 };

-// Monitors "backupStartedKey". If "started" is true, wait until the key is set;
+// Monitors "backupStartedKey". If "present" is true, wait until the key is set;
 // otherwise, wait until the key is cleared. If "watch" is false, do not perform
 // the wait for key set/clear events. Returns if key present.
-ACTOR Future<bool> monitorBackupStartedKeyChanges(BackupData* self, bool started, bool watch) {
+ACTOR Future<bool> monitorBackupStartedKeyChanges(BackupData* self, bool present, bool watch) {
 	loop {
 		state ReadYourWritesTransaction tr(self->cx);

@ -418,13 +432,13 @@ ACTOR Future<bool> monitorBackupStartedKeyChanges(BackupData* self, bool started
 					}
 					self->exitEarly = shouldExit;
 					self->onBackupChanges(uidVersions);
-					if (started || !watch) return true;
+					if (present || !watch) return true;
 				} else {
 					TraceEvent("BackupWorkerEmptyStartKey", self->myId);
 					self->onBackupChanges(uidVersions);

 					self->exitEarly = shouldExit;
-					if (!started || !watch) {
+					if (!present || !watch) {
 						return false;
 					}
 				}
@ -650,8 +664,13 @@ ACTOR Future<Void> saveMutationsToFile(BackupData* self, Version popVersion, int
 		activeUids.push_back(it->first);
 		self->insertRanges(keyRangeMap, it->second.ranges.get(), index);
 		if (it->second.lastSavedVersion == invalidVersion) {
-			it->second.lastSavedVersion =
-			    self->savedVersion > self->startVersion ? self->savedVersion : self->startVersion;
+			if (it->second.startVersion > self->startVersion && !self->messages.empty()) {
+				// True-up first mutation log's begin version
+				it->second.lastSavedVersion = self->messages[0].getVersion();
+			} else {
+				it->second.lastSavedVersion =
+				    std::max(self->popVersion, std::max(self->savedVersion, self->startVersion));
+			}
 		}
 		logFileFutures.push_back(it->second.container.get().get()->writeTaggedLogFile(
 		    it->second.lastSavedVersion, popVersion + 1, blockSize, self->tag.id, self->totalTags));
@ -762,6 +781,7 @@ ACTOR Future<Void> uploadData(BackupData* self) {
 		if (((numMsg > 0 || popVersion > lastPopVersion) && self->pulling) || self->pullFinished()) {
 			TraceEvent("BackupWorkerSave", self->myId)
 			    .detail("Version", popVersion)
+			    .detail("SavedVersion", self->savedVersion)
 			    .detail("MsgQ", self->messages.size());
 			// save an empty file for old epochs so that log file versions are continuous
 			wait(saveMutationsToFile(self, popVersion, numMsg));
@ -769,9 +789,11 @@ ACTOR Future<Void> uploadData(BackupData* self) {
 		}

 		// If transition into NOOP mode, should clear messages
-		if (!self->pulling) self->messages.clear();
+		if (!self->pulling) {
+			self->messages.clear();
+		}

-		if (popVersion > self->savedVersion) {
+		if (popVersion > self->savedVersion && popVersion > self->popVersion) {
 			wait(saveProgress(self, popVersion));
 			TraceEvent("BackupWorkerSavedProgress", self->myId)
 			    .detail("Tag", self->tag.toString())
@ -872,10 +894,13 @@ ACTOR Future<Void> monitorBackupKeyOrPullData(BackupData* self, bool keyPresent)
 				when(wait(success(present))) { break; }
 				when(wait(success(committedVersion) || delay(SERVER_KNOBS->BACKUP_NOOP_POP_DELAY, self->cx->taskID))) {
 					if (committedVersion.isReady()) {
-						self->savedVersion = std::max(committedVersion.get(), self->savedVersion);
+						self->popVersion =
+						    std::max(self->popVersion, std::max(committedVersion.get(), self->savedVersion));
 						self->minKnownCommittedVersion =
 						    std::max(committedVersion.get(), self->minKnownCommittedVersion);
-						TraceEvent("BackupWorkerNoopPop", self->myId).detail("SavedVersion", self->savedVersion);
+						TraceEvent("BackupWorkerNoopPop", self->myId)
+						    .detail("SavedVersion", self->savedVersion)
+						    .detail("PopVersion", self->popVersion);
 						self->pop(); // Pop while the worker is in this NOOP state.
 						committedVersion = Never();
 					} else {
@ -884,6 +909,7 @@ ACTOR Future<Void> monitorBackupKeyOrPullData(BackupData* self, bool keyPresent)
 				}
 			}
 		}
+		ASSERT(!keyPresent == present.get());
 		keyPresent = !keyPresent;
 	}
 }
--- a/fdbserver/RestoreCommon.actor.cpp
+++ b/fdbserver/RestoreCommon.actor.cpp
@ -297,63 +297,6 @@ std::string RestoreConfigFR::toString() {
 // parallelFileRestore is copied from FileBackupAgent.actor.cpp for the same reason as RestoreConfigFR is copied
 namespace parallelFileRestore {

-ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<IAsyncFile> file, int64_t offset,
-                                                                      int len) {
-	state Standalone<StringRef> buf = makeString(len);
-	int rLen = wait(file->read(mutateString(buf), len, offset));
-	if (rLen != len) throw restore_bad_read();
-
-	Standalone<VectorRef<KeyValueRef>> results({}, buf.arena());
-	state StringRefReader reader(buf, restore_corrupted_data());
-
-	try {
-		// Read header, currently only decoding version 1001
-		if (reader.consume<int32_t>() != 1001) throw restore_unsupported_file_version();
-
-		// Read begin key, if this fails then block was invalid.
-		uint32_t kLen = reader.consumeNetworkUInt32();
-		const uint8_t* k = reader.consume(kLen);
-		results.push_back(results.arena(), KeyValueRef(KeyRef(k, kLen), ValueRef()));
-
-		// Read kv pairs and end key
-		while (1) {
-			// Read a key.
-			kLen = reader.consumeNetworkUInt32();
-			k = reader.consume(kLen);
-
-			// If eof reached or first value len byte is 0xFF then a valid block end was reached.
-			if (reader.eof() || *reader.rptr == 0xFF) {
-				results.push_back(results.arena(), KeyValueRef(KeyRef(k, kLen), ValueRef()));
-				break;
-			}
-
-			// Read a value, which must exist or the block is invalid
-			uint32_t vLen = reader.consumeNetworkUInt32();
-			const uint8_t* v = reader.consume(vLen);
-			results.push_back(results.arena(), KeyValueRef(KeyRef(k, kLen), ValueRef(v, vLen)));
-
-			// If eof reached or first byte of next key len is 0xFF then a valid block end was reached.
-			if (reader.eof() || *reader.rptr == 0xFF) break;
-		}
-
-		// Make sure any remaining bytes in the block are 0xFF
-		for (auto b : reader.remainder())
-			if (b != 0xFF) throw restore_corrupted_data_padding();
-
-		return results;
-
-	} catch (Error& e) {
-		TraceEvent(SevError, "FileRestoreCorruptRangeFileBlock")
-		    .error(e)
-		    .detail("Filename", file->getFilename())
-		    .detail("BlockOffset", offset)
-		    .detail("BlockLen", len)
-		    .detail("ErrorRelativeOffset", reader.rptr - buf.begin())
-		    .detail("ErrorAbsoluteOffset", reader.rptr - buf.begin() + offset);
-		throw;
-	}
-}
-
 ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeLogFileBlock(Reference<IAsyncFile> file, int64_t offset,
                                                                    int len) {
 	state Standalone<StringRef> buf = makeString(len);
--- a/fdbserver/RestoreCommon.actor.h
+++ b/fdbserver/RestoreCommon.actor.h
@ -248,8 +248,6 @@ struct RestoreFileFR {
 };

 namespace parallelFileRestore {
-ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeRangeFileBlock(Reference<IAsyncFile> file, int64_t offset,
-                                                                      int len);
 ACTOR Future<Standalone<VectorRef<KeyValueRef>>> decodeLogFileBlock(Reference<IAsyncFile> file, int64_t offset,
                                                                    int len);
 } // namespace parallelFileRestore
--- a/fdbserver/RestoreLoader.actor.cpp
+++ b/fdbserver/RestoreLoader.actor.cpp
@ -23,6 +23,7 @@

 #include "flow/UnitTest.h"
 #include "fdbclient/BackupContainer.h"
+#include "fdbclient/BackupAgent.actor.h"
 #include "fdbserver/RestoreLoader.actor.h"
 #include "fdbserver/RestoreRoleCommon.actor.h"

@ -817,11 +818,18 @@ ACTOR static Future<Void> _parseRangeFileToMutationsOnLoader(

 	// The set of key value version is rangeFile.version. the key-value set in the same range file has the same version
 	Reference<IAsyncFile> inFile = wait(bc->readFile(asset.filename));
-	Standalone<VectorRef<KeyValueRef>> blockData =
-	    wait(parallelFileRestore::decodeRangeFileBlock(inFile, asset.offset, asset.len));
-	TraceEvent("FastRestore")
-	    .detail("DecodedRangeFile", asset.filename)
-	    .detail("DataSize", blockData.contents().size());
+	state VectorRef<KeyValueRef> blockData;
+	try {
+		Standalone<VectorRef<KeyValueRef>> kvs =
+		    wait(fileBackup::decodeRangeFileBlock(inFile, asset.offset, asset.len));
+		TraceEvent("FastRestore")
+		    .detail("DecodedRangeFile", asset.filename)
+		    .detail("DataSize", kvs.contents().size());
+		blockData = kvs;
+	} catch (Error& e) {
+		TraceEvent(SevError, "FileRestoreCorruptRangeFileBlock").error(e);
+		throw;
+	}

 	// First and last key are the range for this file
 	KeyRange fileRange = KeyRangeRef(blockData.front().key, blockData.back().key);
--- a/fdbserver/RestoreMaster.actor.cpp
+++ b/fdbserver/RestoreMaster.actor.cpp
@ -717,22 +717,10 @@ ACTOR static Future<Version> collectBackupFiles(Reference<IBackupContainer> bc,
 ACTOR static Future<Void> insertRangeVersion(KeyRangeMap<Version>* pRangeVersions, RestoreFileFR* file,
                                             Reference<IBackupContainer> bc) {
 	TraceEvent("FastRestoreMasterDecodeRangeVersion").detail("File", file->toString());
-	state Reference<IAsyncFile> inFile = wait(bc->readFile(file->fileName));
-	state bool beginKeySet = false;
-	state Key beginKey;
-	state Key endKey;
-	state int64_t j = 0;
-	for (; j < file->fileSize; j += file->blockSize) {
-		int64_t len = std::min<int64_t>(file->blockSize, file->fileSize - j);
-		Standalone<VectorRef<KeyValueRef>> blockData = wait(parallelFileRestore::decodeRangeFileBlock(inFile, j, len));
-		if (!beginKeySet) {
-			beginKey = blockData.front().key;
-		}
-		endKey = blockData.back().key;
-	}
+	RangeFile rangeFile(file->version, file->blockSize, file->fileName, file->fileSize);

 	// First and last key are the range for this file: endKey is exclusive
-	KeyRange fileRange = KeyRangeRef(beginKey.contents(), endKey.contents());
+	KeyRange fileRange = wait(bc->getSnapshotFileKeyRange(rangeFile));
 	TraceEvent("FastRestoreMasterInsertRangeVersion")
 	    .detail("DecodedRangeFile", file->fileName)
 	    .detail("KeyRange", fileRange)
--- a/fdbserver/RestoreUtil.h
+++ b/fdbserver/RestoreUtil.h
@ -35,8 +35,8 @@
 #include <cstdint>
 #include <cstdarg>

-//#define SevFRMutationInfo SevVerbose
-#define SevFRMutationInfo SevInfo
+#define SevFRMutationInfo SevVerbose
+//#define SevFRMutationInfo SevInfo

 struct VersionedMutation {
 	MutationRef mutation;
--- a/fdbserver/TagPartitionedLogSystem.actor.cpp
+++ b/fdbserver/TagPartitionedLogSystem.actor.cpp
@ -188,6 +188,7 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
 	bool remoteLogsWrittenToCoreState;
 	bool hasRemoteServers;
 	AsyncTrigger backupWorkerChanged;
+	std::set<UID> removedBackupWorkers; // Workers that are removed before setting them.

 	Optional<Version> recoverAt;
 	Optional<Version> recoveredAt;
@ -1399,6 +1400,10 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
 		LogEpoch logsetEpoch = this->epoch;
 		oldestBackupEpoch = this->epoch;
 		for (const auto& reply : replies) {
+			if (removedBackupWorkers.count(reply.interf.id()) > 0) {
+				removedBackupWorkers.erase(reply.interf.id());
+				continue;
+			}
 			Reference<AsyncVar<OptionalInterface<BackupInterface>>> worker(new AsyncVar<OptionalInterface<BackupInterface>>(OptionalInterface<BackupInterface>(reply.interf)));
 			if (reply.backupEpoch != logsetEpoch) {
 				// find the logset from oldLogData
@ -1408,6 +1413,9 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
 				ASSERT(logset.isValid());
 			}
 			logset->backupWorkers.push_back(worker);
+			TraceEvent("AddBackupWorker", dbgid)
+			    .detail("Epoch", logsetEpoch)
+			    .detail("BackupWorkerID", reply.interf.id());
 		}
 		TraceEvent("SetOldestBackupEpoch", dbgid).detail("Epoch", oldestBackupEpoch);
 		backupWorkerChanged.trigger();
@ -1434,6 +1442,8 @@ struct TagPartitionedLogSystem : ILogSystem, ReferenceCounted<TagPartitionedLogS
 				}
 			}
 			backupWorkerChanged.trigger();
+		} else {
+			removedBackupWorkers.insert(req.workerUID);
 		}

 		TraceEvent("RemoveBackupWorker", dbgid)
--- a/fdbserver/workloads/BackupAndParallelRestoreCorrectness.actor.cpp
+++ b/fdbserver/workloads/BackupAndParallelRestoreCorrectness.actor.cpp
@ -450,7 +450,8 @@ struct BackupAndParallelRestoreCorrectnessWorkload : TestWorkload {
 						targetVersion = desc.minRestorableVersion.get();
 					} else if (deterministicRandom()->random01() < 0.1) {
 						targetVersion = desc.maxRestorableVersion.get();
-					} else if (deterministicRandom()->random01() < 0.5) {
+					} else if (deterministicRandom()->random01() < 0.5 &&
+					           desc.minRestorableVersion.get() < desc.contiguousLogEnd.get()) {
 						// The assertion may fail because minRestorableVersion may be decided by snapshot version.
 						// ASSERT_WE_THINK(desc.minRestorableVersion.get() <= desc.contiguousLogEnd.get());
 						// This assertion can fail when contiguousLogEnd < maxRestorableVersion and
--- a/flow/actorcompiler/ActorCompiler.cs
+++ b/flow/actorcompiler/ActorCompiler.cs
@ -953,7 +953,15 @@ namespace actorcompiler
                    // if it has side effects
                    cx.target.WriteLine("if (!{0}->SAV<{1}>::futures) {{ (void)({2}); this->~{3}(); {0}->destroy(); return 0; }}", This, actor.returnType, stmt.expression, stateClassName);
                    // Build the return value directly in SAV<T>::value_storage
-                    cx.target.WriteLine("new (&{0}->SAV< {1} >::value()) {1}({2});", This, actor.returnType, stmt.expression);
+                    // If the expression is exactly the name of a state variable, std::move() it
+                    if (state.Exists(s => s.name == stmt.expression))
+                    {
+                        cx.target.WriteLine("new (&{0}->SAV< {1} >::value()) {1}(std::move({2})); // state_var_RVO", This, actor.returnType, stmt.expression);
+                    }
+                    else
+                    {
+                        cx.target.WriteLine("new (&{0}->SAV< {1} >::value()) {1}({2});", This, actor.returnType, stmt.expression);
+                    }
                    // Destruct state
                    cx.target.WriteLine("this->~{0}();", stateClassName);
                    // Tell SAV<T> to return the value we already constructed in value_storage
--- a/flow/flow.h
+++ b/flow/flow.h
@ -687,6 +687,11 @@ public:
 	{
 		sav->send(presentValue);
 	}
+	Future(T&& presentValue)
+		: sav(new SAV<T>(1, 0))
+	{
+		sav->send(std::move(presentValue));
+	}
 	Future(Never)
 		: sav(new SAV<T>(1, 0))
 	{