Merge branch 'master' of github.com:apple/foundationdb into backup

2017-09-06 10:01:55 -07:00 · 2017-09-06 10:01:55 -07:00 · fe208d6adf
parent 9f8056754a eceadc6281
commit fe208d6adf
38 changed files with 701 additions and 892 deletions
--- a/bindings/c/test/performance_test.c
+++ b/bindings/c/test/performance_test.c
@ -500,8 +500,8 @@ struct RunResult getSingleKeyRange(struct ResultSet *rs, FDBTransaction *tr) {
 		FDBFuture *f = fdb_transaction_get_range(tr,
 			keys[key], keySize, 1, 0,
 			keys[key + 1], keySize, 1, 0,
-			0, 0,
-			FDB_STREAMING_MODE_WANT_ALL, 1, 0, 0);
+			2, 0,
+			FDB_STREAMING_MODE_EXACT, 1, 0, 0);

 		e = maybeLogError(fdb_future_block_until_ready(f), "waiting for single key range", rs);
 		if(e) {
@ -516,7 +516,7 @@ struct RunResult getSingleKeyRange(struct ResultSet *rs, FDBTransaction *tr) {
 		}

 		if(outCount != 1) {
-			logError(4100, "non-1 number of keys returned in single key range read", rs);
+			logError(4100, "more than one key returned in single key range read", rs);
 			fdb_future_destroy(f);
 			return RES(0, 4100);
 		}
--- a/bindings/flow/fdb_flow.actor.cpp
+++ b/bindings/flow/fdb_flow.actor.cpp
@ -41,7 +41,7 @@ ACTOR Future<Void> _test() {
 	// tr->setVersion(1);

 	Version ver = wait( tr->getReadVersion() );
-	printf("%ld\n", ver);
+	printf("%lld\n", ver);

 	state std::vector< Future<Version> > versions;

--- a/bindings/java/src-completable/test/com/apple/cie/foundationdb/test/AsListTest.java
+++ b/bindings/java/src-completable/test/com/apple/cie/foundationdb/test/AsListTest.java
@ -1,88 +0,0 @@
-/*
- * AsListTest.java
- *
- * This source file is part of the FoundationDB open source project
- *
- * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.apple.cie.foundationdb.test;
-
-import com.apple.cie.foundationdb.Database;
-import com.apple.cie.foundationdb.FDB;
-import com.apple.cie.foundationdb.LocalityUtil;
-import com.apple.cie.foundationdb.Transaction;
-import com.apple.cie.foundationdb.async.AsyncUtil;
-
-import java.util.function.Function;
-import java.util.concurrent.CompletableFuture;
-
-public class AsListTest {
-	/**
-	 * When the database contains keys a, b, c, d, e -- this should return 5 items,
-	 * a bug made the addition of the clear into the result returning 0 items.
-	 */
-	public static void main(String[] args) {
-		FDB fdb = FDB.selectAPIVersion(500);
-		Database database = fdb.open("T:\\circus\\tags\\RebarCluster-bbc\\cluster_id.txt");
-		database.options().setLocationCacheSize(42);
-		Transaction tr = database.createTransaction();
-		//tr.clear("g".getBytes());
-		/*tr.clear("bbb".getBytes());
-		AsyncIterable<KeyValue> query = tr.getRange(
-				KeySelector.firstGreaterOrEqual("a".getBytes()),
-				KeySelector.firstGreaterOrEqual("e".getBytes()),
-				Integer.MAX_VALUE);
-		//List<KeyValue> list = query.asList().get();
-		//System.out.println("List size: " + list.size());
-*/
-		String[] keyAddresses = LocalityUtil.getAddressesForKey(tr, "a".getBytes()).join();
-		for(String s : keyAddresses) {
-			System.out.println(" @ " + s);
-		}
-
-		@SuppressWarnings("unused")
-		CompletableFuture<Integer> i = AsyncUtil.applySafely(new Function<Exception, CompletableFuture<Integer>>() {
-			@Override
-			public CompletableFuture<Integer> apply(Exception o) {
-				return CompletableFuture.completedFuture(3);
-			}
-		}, new RuntimeException());
-
-		CompletableFuture<Integer> f = null;
-
-		@SuppressWarnings({ "unused", "null" })
-		CompletableFuture<String> g = f.thenComposeAsync(new Function<Integer, CompletableFuture<String>>() {
-			@Override
-			public CompletableFuture<String> apply(Integer o) {
-				return CompletableFuture.completedFuture(o.toString());
-			}
-		});
-
-		@SuppressWarnings({ "unused", "null" })
-		CompletableFuture<String> g2 = f.thenComposeAsync(new Function<Integer, CompletableFuture<String>>() {
-			@Override
-			public CompletableFuture<String> apply(Integer o) {
-				return CompletableFuture.completedFuture(o.toString());
-			}
-		}).exceptionally(new Function<Throwable, String>() {
-			@Override
-			public String apply(Throwable o) {
-				// TODO Auto-generated method stub
-				return null;
-			}
-		});
-	}
-}
--- a/bindings/java/src-completable/test/com/apple/cie/foundationdb/test/OSTest.java
+++ b/bindings/java/src-completable/test/com/apple/cie/foundationdb/test/OSTest.java
@ -1,38 +0,0 @@
-/*
- * OSTest.java
- *
- * This source file is part of the FoundationDB open source project
- *
- * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.apple.cie.foundationdb.test;
-
-import java.io.InputStream;
-
-public class OSTest {
-
-	/**
-	 * @param args
-	 */
-	public static void main(String[] args) {
-		System.out.println("OS name: " + System.getProperty("os.name"));
-		System.out.println("OS arch: " + System.getProperty("os.arch"));
-
-		InputStream stream = OSTest.class.getResourceAsStream("/lib/linux/amd64/libfdb_java.so");
-		System.out.println("Stream: " + stream);
-	}
-
-}
--- a/bindings/java/src-completable/test/com/apple/cie/foundationdb/test/PerformanceTester.java
+++ b/bindings/java/src-completable/test/com/apple/cie/foundationdb/test/PerformanceTester.java
@ -352,7 +352,7 @@ public class PerformanceTester extends AbstractTester {
            long start = System.nanoTime();
            for (int i = 0; i < count; i++) {
                int keyIndex = randomKeyIndex();
-                tr.getRange(key(keyIndex), key(keyIndex + 1)).asList().join();
+                tr.getRange(key(keyIndex), key(keyIndex + 1), 2).asList().join();
            }
            long end = System.nanoTime();

--- a/bindings/java/src-completable/test/com/apple/cie/foundationdb/test/TestApp.java
+++ b/bindings/java/src-completable/test/com/apple/cie/foundationdb/test/TestApp.java
@ -1,91 +0,0 @@
-/*
- * TestApp.java
- *
- * This source file is part of the FoundationDB open source project
- *
- * Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
- * 
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- * 
- *     http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package com.apple.cie.foundationdb.test;
-
-import java.util.concurrent.CompletableFuture;
-
-import com.apple.cie.foundationdb.Cluster;
-import com.apple.cie.foundationdb.Database;
-import com.apple.cie.foundationdb.FDB;
-import com.apple.cie.foundationdb.Transaction;
-
-public class TestApp {
-
-	public static void main(String[] args) throws Exception {
-		try {
-			Cluster cluster = FDB.selectAPIVersion(500).createCluster();
-			System.out.println("I now have the cluster");
-			Database db = cluster.openDatabase();
-
-			Transaction tr = db.createTransaction();
-			System.out.println("TR: " + tr);
-
-			byte[] appleValue = tr.get("apple".getBytes()).get();
-			System.out.println("Apple: " + (appleValue == null ? null : new String(appleValue)));
-
-			tr.set("apple".getBytes(), "crunchy".getBytes());
-			System.out.println("Attempting to commit apple/crunchy...");
-			tr.commit().get(); // FIXME: this is not an ok use of the API
-			tr = db.createTransaction();
-
-			long topTime = 0, getTime = 0, bottomTime = 0;
-
-			for(int i = 0; i < 1000; i++) {
-				long a = System.currentTimeMillis();
-
-				final byte[] key = ("apple" + i).getBytes();
-				tr = db.createTransaction();
-				CompletableFuture<byte[]> future = tr.get(key);
-
-				long b = System.currentTimeMillis();
-
-				future.get();
-
-				long c = System.currentTimeMillis();
-
-				tr.set(key, ("Apple" + i).getBytes());
-				final CompletableFuture<Void> commit = tr.commit();
-
-				long d = System.currentTimeMillis();
-
-				commit.whenCompleteAsync((v, error) -> {
-					if(error != null) {
-						error.printStackTrace();
-					}
-				});
-
-				topTime += b - a;
-				getTime += c - b;
-				bottomTime += d - c;
-			}
-
-			System.out.println(" Top:    " + topTime);
-			System.out.println(" Get:    " + getTime);
-			System.out.println(" Bottom: " + bottomTime);
-
-			tr.dispose();
-			db.dispose();
-			cluster.dispose();
-		} catch(Throwable t) {
-			t.printStackTrace();
-		}
-	}
-}
--- a/design/tuple.md
+++ b/design/tuple.md
@ -0,0 +1,196 @@
+# FDB Tuple layer typecodes
+
+This document is intended to be the system of record for the allocation of typecodes in the Tuple layer. The source code isn’t good enough because a typecode might be added to one language (or by a customer) before another.
+
+Status: Standard means that all of our language bindings implement this typecode
+Status: Reserved means that this typecode is not yet used in our standard language bindings, but may be in use by third party bindings or specific applications
+Status: Deprecated means that a previous layer used this type, but issues with that type code have led us to mark this type code as not to be used.
+
+
+### **Null Value**
+
+Typecode: 0x00  
+Length: 0 bytes  
+Status: Standard
+
+### **Byte String**
+
+Typecode: 0x01  
+Length: Variable (terminated by` [\x00]![\xff]`)  
+Encoding: `b'\x01' + value.replace(b'\x00', b'\x00\xFF') + b'\x00'`  
+Test case: `pack(“foo\x00bar”) == b'\x01foo\x00\xffbar\x00'`  
+Status: Standard
+
+In other words, byte strings are null terminated with null values occurring in the string escaped in an order-preserving way.
+
+### **Unicode String**
+
+Typecode: 0x02  
+Length: Variable (terminated by [\x00]![\xff])  
+Encoding: `b'\x02' + value.encode('utf-8').replace(b'\x00', b'\x00\xFF') + b'\x00'`  
+Test case: `pack( u"F\u00d4O\u0000bar" ) == b'\x02F\xc3\x94O\x00\xffbar\x00'`  
+Status: Standard
+
+This is the same way that byte strings are encoded, but first, the unicode string is encoded in UTF-8.
+
+### **(DEBRECATED) Nested Tuple**
+
+Typecodes: 0x03-0x04  
+Length: Variable (terminated by 0x04 type code)  
+Status: Deprecated  
+
+This encoding was used by a few layers. However, it had ordering problems when one tuple was a prefix of another and the type of the first element in the longer tuple was either null or a byte string. For an example, consider the empty tuple and the tuple containing only null. In the old scheme, the empty tuple would be encoded as `\x03\x04` while the tuple containing only null would be encoded as `\x03\x00\x04`, so the second tuple would sort first based on their bytes, which is incorrect semantically.
+
+### **Nested Tuple**
+
+Typecodes: 0x05  
+Length: Variable (terminated by `[\x00]![\xff]` at beginning of nested element)  
+Encoding: `b'\x05' + ''.join(map(lambda x: b'\x00\xff' if x is None else pack(x), value)) + b'\x00'`  
+Test case: `pack( (“foo\x00bar”, None, ()) ) == b'\x05\x01foo\x00\xffbar\x00\x00\xff\x05\x00\x00'`  
+Status: Standard
+
+The list is ended with a 0x00 byte. Nulls within the tuple are encoded as `\x00\xff`. There is no other null escaping. In particular, 0x00 bytes that are within the nested types can be left as-is as they are passed over when decoding the interior types. To show how this fixes the bug in the previous version of nested tuples, the empty tuple is now encoded as `\x05\x00` while the tuple containing only null is encoded as `\x05\x00\xff\x00`, so the first tuple will sort first.
+
+### **Negative arbitrary-precision Integer**
+
+Typecodes: 0x0a, 0x0b  
+Encoding: Not defined yet  
+Status: Reserved; 0x0b used in Python and Java
+
+These typecodes are reserved for encoding integers larger than 8 bytes. Presumably the type code would be followed by some encoding of the length, followed by the big endian one’s complement number. Reserving two typecodes for each of positive and negative numbers is probably overkill, but until there’s a design in place we might as well not use them. In the Python and Java implementations, 0x0b stores negative numbers which are expressed with between 9 and 255 bytes. The first byte following the type code (0x0b) is a single byte expressing the number of bytes in the integer (with its bits flipped to preserve order), followed by that number of bytes representing the number in big endian order in one's complement.
+
+### **Integer**
+
+Typecodes: 0x0c - 0x1c  
+&nbsp;0x0c is an 8 byte negative number  
+&nbsp;0x13 is a 1 byte negative number  
+&nbsp;0x14 is a zero  
+&nbsp;0x15 is a 1 byte positive number  
+&nbsp;0x1c is an 8 byte positive number  
+Length: Depends on typecode (0-8 bytes)  
+Encoding: positive numbers are big endian  
+ negative numbers are big endian one’s complement (so -1 is 0x13 0xfe)  
+Test case: `pack( -5551212 ) == b'\x11\xabK\x93'`  
+Status: Standard
+
+There is some variation in the ability of language bindings to encode and decode values at the outside of the possible range, because of different native representations of integers. 
+
+### **Positive arbitrary-precision Integer**
+
+Typecodes: 0x1d, 0x1e  
+Encoding: Not defined yet  
+Status: Reserved; 0x1d used in Python and Java
+
+These typecodes are reserved for encoding integers larger than 8 bytes. Presumably the type code would be followed by some encoding of the length, followed by the big endian one’s complement number. Reserving two typecodes for each of positive and negative numbers is probably overkill, but until there’s a design in place we might as well not use them. In the Python and Java implementations, 0x1d stores positive numbers which are expressed with between 9 and 255 bytes. The first byte following the type code (0x1d) is a single byte expressing the number of bytes in the integer, followed by that number of bytes representing the number in big endian order.
+
+### **IEEE Binary Floating Point**
+
+Typecodes:   
+&nbsp;0x20 - float (32 bits)  
+&nbsp;0x21 - double (64 bits)  
+&nbsp;0x22 - long double (80 bits)  
+Length: 4 - 10 bytes  
+Test case: `pack( -42f ) == b'=\xd7\xff\xff'`  
+Encoding: Big-endian IEEE binary representation, followed by the following transformation:  
+```python
+ if ord(rep[0])&0x80: # Check sign bit
+    # Flip all bits, this is easier in most other languages!
+    return "".join( chr(0xff^ord(r)) for r in rep )
+ else:
+    # Flip just the sign bit
+    return chr(0x80^ord(rep[0])) + rep[1:]
+```
+Status: Standard (float and double) ; Reserved (long double)
+
+The binary representation should not be assumed to be canonicalized (as to multiple representations of NaN, for example) by a reader. This order sorts all numbers in the following way:
+
+* All negative NaN values with order determined by mantissa bits (which are semantically meaningless)
+* Negative inifinity
+* All real numbers in the standard order (except that -0.0 < 0.0)
+* Positive infinity
+* All positive NaN values with order determined by mantissa bits
+
+This should be equivalent to the standard IEEE total ordering.
+
+### **Arbitrary-precision Decimal**
+
+Typecodes: 0x23, 0x24  
+Length: Arbitrary  
+Encoding: Scale followed by arbitrary precision integer  
+Status: Reserved
+
+This encoding format has been used by layers. Note that this encoding makes almost no guarantees about ordering properties of tuple-encoded values and should thus generally be avoided.
+
+### **(DEPRECATED) True Value**
+
+Typecode: 0x25  
+Length: 0 bytes  
+Status: Deprecated
+
+### **False Value**
+
+Typecode: 0x26  
+Length: 0 bytes  
+Status: Standard
+
+### **True Value**
+
+Typecode: 0x27  
+Length: 0 bytes  
+Status: Standard
+
+Note that false will sort before true with the given encoding.
+
+### **RFC 4122 UUID**
+
+Typecode: 0x30  
+Length: 16 bytes  
+Encoding: Network byte order as defined in the rfc: [_http://www.ietf.org/rfc/rfc4122.txt_](http://www.ietf.org/rfc/rfc4122.txt)  
+Status: Standard
+
+This is equivalent to the unsigned byte ordering of the UUID bytes in big-endian order.
+
+### **64 bit identifier**
+
+Typecode: 0x31  
+Length: 8 bytes  
+Encoding: Big endian unsigned 8-byte integer (typically random or perhaps semi-sequential)  
+Status: Reserved
+
+There’s definitely some question of whether this deserves to be separated from a plain old 64 bit integer, but a separate type was desired in one of the third-party bindings. This type has not been ported over to the first-party bindings.
+
+### **80 Bit versionstamp**
+
+Typecode: 0x32  
+Length: 10 bytes  
+Encoding: Big endian 10-byte integer. First/high 8 bytes are a database version, next two are batch version.  
+Status: Reserved
+
+### **96 Bit Versionstamp**
+
+Typecode: 0x33  
+Length: 12 bytes  
+Encoding: Big endian 12-byte integer. First/high 8 bytes are a database version, next two are batch version, next two are ordering within transaction.  
+Status: Reserved
+
+The two versionstamp typecodes are reserved for future work adding compatibility between the tuple layer and versionstamp operations. Note that the first 80 bits of the 96 bit versionstamp are the same as the contents of the 80 bit versionstamp, and they correspond to what the `SET_VERSIONSTAMP_KEY` mutation will write into a database key , i.e., the first 8 bytes are a big-endian, unsigned version corresponding to the commit version of a transaction, and the next to bytes are a big-endian, unsigned batch number ordering transactions are committed at the same version. The final two bytes of the 96 bit versionstamp are written by the client and should order writes within a single transaction, thereby providing a global order for all versions.
+
+### **User type codes**
+
+Typecode: 0x40 - 0x4f  
+Length: Variable (user defined)  
+Encoding: User defined  
+Status: Reserved
+
+These type codes may be used by third party extenders without coordinating with us. If used in shipping software, the software should use the directory layer and specify a specific layer name when opening its directories to eliminate the possibility of conflicts.
+
+The only way in which future official, otherwise backward-compatible versions of the tuple layer would be expected to use these type codes is to implement some kind of actual extensibility point for this purpose - they will not be used for standard types.
+
+### **Escape Character**
+
+Typecode: 0xff  
+Length: N/A  
+Encoding: N/A  
+Status: Reserved
+
+This type code is not used for anything. However, several of the other tuple types depend on this type code not being used as a type code for other types in order to correctly escape bytes in an order-preserving way. Therefore, it would be a Very Bad Idea™ for future development to start using this code for anything else.
--- a/fdbcli/fdbcli.actor.cpp
+++ b/fdbcli/fdbcli.actor.cpp
@ -437,9 +437,9 @@ void initHelp() {
 		"clear a range of keys from the database",
 		"All keys between BEGINKEY (inclusive) and ENDKEY (exclusive) are cleared from the database. This command will succeed even if the specified range is empty, but may fail because of conflicts." ESCAPINGK);
 	helpMap["configure"] = CommandHelp(
-		"configure [new] <single|double|triple|three_data_hall|three_datacenter|ssd|memory|proxies=<PROXIES>|logs=<LOGS>|resolvers=<RESOLVERS>>*",
+		"configure [new] <single|double|triple|three_data_hall|three_datacenter|multi_dc|ssd|memory|proxies=<PROXIES>|logs=<LOGS>|resolvers=<RESOLVERS>>*",
 		"change database configuration",
-		"The `new' option, if present, initializes a new database with the given configuration rather than changing the configuration of an existing one. When used, both a redundancy mode and a storage engine must be specified.\n\nRedundancy mode:\n  single - one copy of the data.  Not fault tolerant.\n  double - two copies of data (survive one failure).\n  triple - three copies of data (survive two failures).\n  three_data_hall - See the Admin Guide.\n  three_datacenter - See the Admin Guide.\n\nStorage engine:\n  ssd - B-Tree storage engine optimized for solid state disks.\n  memory - Durable in-memory storage engine for small datasets.\n\nproxies=<PROXIES>: Sets the desired number of proxies in the cluster. Must be at least 1, or set to -1 which restores the number of proxies to the default value.\n\nlogs=<LOGS>: Sets the desired number of log servers in the cluster. Must be at least 1, or set to -1 which restores the number of logs to the default value.\n\nresolvers=<RESOLVERS>: Sets the desired number of resolvers in the cluster. Must be at least 1, or set to -1 which restores the number of resolvers to the default value.\n\nSee the FoundationDB Administration Guide for more information.");
+		"The `new' option, if present, initializes a new database with the given configuration rather than changing the configuration of an existing one. When used, both a redundancy mode and a storage engine must be specified.\n\nRedundancy mode:\n  single - one copy of the data.  Not fault tolerant.\n  double - two copies of data (survive one failure).\n  triple - three copies of data (survive two failures).\n  three_data_hall - See the Admin Guide.\n  three_datacenter - See the Admin Guide.\n  multi_dc - See the Admin Guide.\n\nStorage engine:\n  ssd - B-Tree storage engine optimized for solid state disks.\n  memory - Durable in-memory storage engine for small datasets.\n\nproxies=<PROXIES>: Sets the desired number of proxies in the cluster. Must be at least 1, or set to -1 which restores the number of proxies to the default value.\n\nlogs=<LOGS>: Sets the desired number of log servers in the cluster. Must be at least 1, or set to -1 which restores the number of logs to the default value.\n\nresolvers=<RESOLVERS>: Sets the desired number of resolvers in the cluster. Must be at least 1, or set to -1 which restores the number of resolvers to the default value.\n\nSee the FoundationDB Administration Guide for more information.");
 	helpMap["coordinators"] = CommandHelp(
 		"coordinators auto|<ADDRESS>+ [description=new_cluster_description]",
 		"change cluster coordinators or description",
@ -1902,7 +1902,7 @@ void onoff_generator(const char* text, const char *line, std::vector<std::string
 }

 void configure_generator(const char* text, const char *line, std::vector<std::string>& lc) {
-	const char* opts[] = {"new", "single", "double", "triple", "three_data_hall", "three_datacenter", "ssd", "ssd-1", "ssd-2", "memory", "proxies=", "logs=", "resolvers=", NULL};
+	const char* opts[] = {"new", "single", "double", "triple", "three_data_hall", "three_datacenter", "multi_dc", "ssd", "ssd-1", "ssd-2", "memory", "proxies=", "logs=", "resolvers=", NULL};
 	array_generator(text, line, opts, lc);
 }

--- a/fdbclient/ManagementAPI.actor.cpp
+++ b/fdbclient/ManagementAPI.actor.cpp
@ -123,6 +123,15 @@ std::map<std::string, std::string> configForToken( std::string const& mode ) {
 		tLogPolicy = IRepPolicyRef(new PolicyAcross(2, "data_hall",
 			IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())))
 		));
+	} else if(mode == "multi_dc") {
+		redundancy="6";
+		log_replicas="4";
+		storagePolicy = IRepPolicyRef(new PolicyAcross(3, "dcid",
+			IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())))
+		));
+		tLogPolicy = IRepPolicyRef(new PolicyAcross(2, "dcid",
+			IRepPolicyRef(new PolicyAcross(2, "zoneid", IRepPolicyRef(new PolicyOne())))
+		));
 	} else
 		redundancySpecified = false;
 	if (redundancySpecified) {
--- a/fdbclient/NativeAPI.actor.cpp
+++ b/fdbclient/NativeAPI.actor.cpp
@ -754,8 +754,8 @@ Reference<Cluster> Cluster::createCluster(std::string connFileName, int apiVersi
 	return Reference<Cluster>(new Cluster( rccf, apiVersion));
 }

-Future<Database> Cluster::createDatabase( Standalone<StringRef> dbName ) {
-	return DatabaseContext::createDatabase( clusterInterface, Reference<Cluster>::addRef( this ), dbName, LocalityData() );
+Future<Database> Cluster::createDatabase( Standalone<StringRef> dbName, LocalityData locality ) {
+	return DatabaseContext::createDatabase( clusterInterface, Reference<Cluster>::addRef( this ), dbName, locality );
 }

 Future<Void> Cluster::onConnected() {
--- a/fdbclient/NativeAPI.h
+++ b/fdbclient/NativeAPI.h
@ -110,7 +110,7 @@ public:
 	static Reference<Cluster> createCluster(std::string connFileName, int apiVersion);

 	// See DatabaseContext::createDatabase
-	Future<Database> createDatabase( Standalone<StringRef> dbName );
+	Future<Database> createDatabase( Standalone<StringRef> dbName, LocalityData locality = LocalityData() );

 	void setOption(FDBClusterOptions::Option option, Optional<StringRef> value);

--- a/fdbrpc/AsyncFileNonDurable.actor.h
+++ b/fdbrpc/AsyncFileNonDurable.actor.h
@ -213,8 +213,9 @@ public:
 			//If we are in the process of deleting a file, we can't let someone else modify it at the same time.  We therefore block the creation of new files until deletion is complete
 			state std::map<std::string, Future<Void>>::iterator deletedFile = filesBeingDeleted.find(filename);
 			if(deletedFile != filesBeingDeleted.end()) {
-				//TraceEvent("AsyncFileNonDurableOpenWaitOnDelete").detail("Filename", filename);
+				//TraceEvent("AsyncFileNonDurableOpenWaitOnDelete1").detail("Filename", filename);
 				Void _ = wait( deletedFile->second || shutdown );
+				//TraceEvent("AsyncFileNonDurableOpenWaitOnDelete2").detail("Filename", filename);
 				if(shutdown.isReady())
 					throw io_error().asInjectedFault();
 			}
@ -711,10 +712,12 @@ private:

 	//Finishes all outstanding actors on an AsyncFileNonDurable and then deletes it
 	ACTOR Future<Void> deleteFile(AsyncFileNonDurable *self) {
-		//We must run on the main thread (instead of a SQLite coroutine).  We don't want to signal any promises from a coroutine, so we switch at the beginning
-		//of this ACTOR
-		Void _ = wait(self->returnToMainThread());
+		state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
+		state int currentTaskID = g_network->getCurrentTask();
+		state std::string filename = self->filename;

+		Void _ = wait( g_simulator.onMachine( currentProcess ) );
+		try {
 			//Make sure all writes have gone through.
 			Promise<bool> startSyncPromise = self->startSyncPromise;
 			self->startSyncPromise = Promise<bool>();
@ -735,11 +738,18 @@ private:

 			//Remove this file from the filesBeingDeleted map so that new files can be created with this filename
 			g_simulator.getMachineByNetworkAddress( self->openedAddress )->closingFiles.erase(self->getFilename());
+			g_simulator.getMachineByNetworkAddress( self->openedAddress )->deletingFiles.erase(self->getFilename());
 			AsyncFileNonDurable::filesBeingDeleted.erase(self->filename);
 			//TraceEvent("AsyncFileNonDurable_FinishDelete", self->id).detail("Filename", self->filename);

 			delete self;
+			Void _ = wait( g_simulator.onProcess( currentProcess, currentTaskID ) );
 			return Void();
+		} catch( Error &e ) {
+			state Error err = e;
+			Void _ = wait( g_simulator.onProcess( currentProcess, currentTaskID ) );
+			throw err;
+		}
 	}
 };

--- a/fdbrpc/FlowTests.actor.cpp
+++ b/fdbrpc/FlowTests.actor.cpp
@ -989,7 +989,7 @@ TEST_CASE("flow/flow/perf/actor patterns")
 			ASSERT(out2[i].isReady());
 		}
 		printf("2xcheeseActor(chooseTwoActor(cheeseActor(fifo), never)): %0.2f M/sec\n", N / 1e6 / (timer() - start));
-		printf("sizeof(CheeseWaitActorActor) == %d\n", sizeof(CheeseWaitActorActor));
+		printf("sizeof(CheeseWaitActorActor) == %lu\n", sizeof(CheeseWaitActorActor));
 	}

 	{
--- a/fdbrpc/LoadBalance.actor.h
+++ b/fdbrpc/LoadBalance.actor.h
@ -204,6 +204,22 @@ Future< REPLY_TYPE(Request) > loadBalance(
 				}
 			}
 		}
+		if( nextMetric > 1e8 ) {
+			for(int i=alternatives->countBest(); i<alternatives->size(); i++) {
+				RequestStream<Request> const* thisStream = &alternatives->get( i, channel );
+				if (!IFailureMonitor::failureMonitor().getState( thisStream->getEndpoint() ).failed) {
+					auto& qd = model->getMeasurement(thisStream->getEndpoint().token.first());
+					double thisMetric = qd.smoothOutstanding.smoothTotal();
+					double thisTime = qd.latency;
+				
+					if( thisMetric < nextMetric ) {
+						nextAlt = i;
+						nextMetric = thisMetric;
+						nextTime = thisTime;
+					}
+				}
+			}
+		}

 		if(nextTime < 1e9) {
 			if(bestTime > FLOW_KNOBS->INSTANT_SECOND_REQUEST_MULTIPLIER*(model->secondMultiplier*(nextTime) + FLOW_KNOBS->BASE_SECOND_REQUEST_TIME)) {
--- a/fdbrpc/Platform.cpp
+++ b/fdbrpc/Platform.cpp
@ -89,15 +89,11 @@ void eraseDirectoryRecursive( std::string const& dir ) {
 	INJECT_FAULT( platform_error, "eraseDirectoryRecursive" );
 #ifdef _WIN32
 	system( ("rd /s /q \"" + dir + "\"").c_str() );
-#elif defined(__linux__)
+#elif defined(__linux__) || defined(__APPLE__)
 	int error =
 		nftw(dir.c_str(),
-			 [](const char *fpath, const struct stat *sb, int typeflag,
-				struct FTW *ftwbuf) -> int {
-				 if (remove(fpath))
-					 return FTW_STOP;
-				 return FTW_CONTINUE;
-			 }, 64, FTW_DEPTH | FTW_PHYS | FTW_ACTIONRETVAL);
+			[](const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftwbuf) -> int { return remove(fpath); }
+			, 64, FTW_DEPTH | FTW_PHYS);
 	/* Looks like calling code expects this to continue silently if
 	   the directory we're deleting doesn't exist in the first
 	   place */
@ -105,14 +101,6 @@ void eraseDirectoryRecursive( std::string const& dir ) {
 		TraceEvent(SevError, "nftw").detail("Directory", dir).GetLastError();
 		throw platform_error();
 	}
-#elif defined(__APPLE__)
-	// const char* argv[2];
-	// argv[0] = dir.c_str();
-	// argv[1] = NULL;
-	// FTS* fts = fts_open(argv, FTS_PHYSICAL | FTS_SEEDOT | FTS_NOSTAT, NULL);
-	// while (FTSENT* ent = fts_read(fts)) {
-	// 	if (ent->fts_info 
-	// }
 #else
 #error Port me!
 #endif
--- a/fdbrpc/dsltest.actor.cpp
+++ b/fdbrpc/dsltest.actor.cpp
@ -79,7 +79,7 @@ bool testFuzzActor( Future<int>(*actor)(FutureStream<int> const&, PromiseStream<
 			}
 		}
 		if (outCount+1 != expectedOutput.size()) {
-			printf("\tERROR: %s output length incorrect: %d != expected %d\n", desc, outCount+1, expectedOutput.size());
+			printf("\tERROR: %s output length incorrect: %d != expected %lu\n", desc, outCount+1, expectedOutput.size());
 			if (trial) printf("\t\tResult was inconsistent between runs!\n");
 			ok = false;
 			//return false;
--- a/fdbrpc/sim2.actor.cpp
+++ b/fdbrpc/sim2.actor.cpp
@ -77,12 +77,12 @@ void ISimulator::displayWorkers() const
 	}

 	printf("DataHall  ZoneId\n");
-	printf("                  Address   Name       Class   Excluded   Failed   Rebooting DataFolder\n");
+	printf("                  Address   Name      Class        Excluded Failed Rebooting Role                                              DataFolder\n");
 	for (auto& zoneRecord : zoneMap) {
 		printf("\n%s\n", zoneRecord.first.c_str());
 		for (auto& processInfo : zoneRecord.second) {
-			printf("                  %9s %-10s %-7s %-8s   %-6s   %-9s %-40s\n",
-			processInfo->address.toString().c_str(), processInfo->name, processInfo->startingClass.toString().c_str(), (processInfo->excluded ? "True" : "False"), (processInfo->failed ? "True" : "False"), (processInfo->rebooting ? "True" : "False"), processInfo->dataFolder);
+			printf("                  %9s %-10s%-13s%-8s %-6s %-9s %-48s %-40s\n",
+			processInfo->address.toString().c_str(), processInfo->name, processInfo->startingClass.toString().c_str(), (processInfo->excluded ? "True" : "False"), (processInfo->failed ? "True" : "False"), (processInfo->rebooting ? "True" : "False"), getRoles(processInfo->address).c_str(), processInfo->dataFolder);
 		}
 	}

@ -886,16 +886,29 @@ public:
 		// This is a _rudimentary_ simulation of the untrustworthiness of non-durable deletes and the possibility of
 		// rebooting during a durable one.  It isn't perfect: for example, on real filesystems testing
 		// for the existence of a non-durably deleted file BEFORE a reboot will show that it apparently doesn't exist.
+		if(g_simulator.getCurrentProcess()->machine->openFiles.count(filename)) {
 			g_simulator.getCurrentProcess()->machine->openFiles.erase(filename);
+			g_simulator.getCurrentProcess()->machine->deletingFiles.insert(filename);
+		}
 		if ( mustBeDurable || g_random->random01() < 0.5 ) {
+			state ISimulator::ProcessInfo* currentProcess = g_simulator.getCurrentProcess();
+			state int currentTaskID = g_network->getCurrentTask();
+			Void _ = wait( g_simulator.onMachine( currentProcess ) );
+			try {
 				Void _ = wait( ::delay(0.05 * g_random->random01()) );
-			if (!self->getCurrentProcess()->rebooting) {
+				if (!currentProcess->rebooting) {
 					auto f = IAsyncFileSystem::filesystem(self->net2)->deleteFile(filename, false);
 					ASSERT( f.isReady() );
 					Void _ = wait( ::delay(0.05 * g_random->random01()) );
 					TEST( true );  // Simulated durable delete
 				}
+				Void _ = wait( g_simulator.onProcess( currentProcess, currentTaskID ) );
 				return Void();
+			} catch( Error &e ) {
+				state Error err = e;
+				Void _ = wait( g_simulator.onProcess( currentProcess, currentTaskID ) );
+				throw err;
+			}
 		} else {
 			TEST( true );  // Simulated non-durable delete
 			return Void();
@ -988,8 +1001,11 @@ public:
 		for (auto processInfo : getAllProcesses()) {
 			// Add non-test processes (ie. datahall is not be set for test processes)
 			if (processInfo->isAvailableClass()) {
+				// Ignore excluded machines
+				if (processInfo->excluded)
+					;
 				// Mark all of the unavailable as dead
-				if (!processInfo->isAvailable())
+				else if (!processInfo->isAvailable())
 					processesDead.push_back(processInfo);
 				else if (protectedAddresses.count(processInfo->address))
 					processesLeft.push_back(processInfo);
@ -1043,22 +1059,22 @@ public:
 			}
 			// Reboot and Delete if remaining machines do NOT fulfill policies
 			else if ((kt != RebootAndDelete) && (kt != RebootProcessAndDelete) && (!processesLeft.validate(tLogPolicy))) {
-				auto newKt = (g_random->random01() < 0.33) ? RebootAndDelete : Reboot;
+				newKt = (g_random->random01() < 0.33) ? RebootAndDelete : Reboot;
 				canSurvive = false;
 				TraceEvent("KillChanged").detail("KillType", kt).detail("NewKillType", newKt).detail("tLogPolicy", tLogPolicy->info()).detail("ProcessesLeft", processesLeft.size()).detail("ProcessesDead", processesDead.size()).detail("RemainingZones", ::describeZones(localitiesLeft)).detail("RemainingDataHalls", ::describeDataHalls(localitiesLeft)).detail("Reason", "tLogPolicy does not validates against remaining processes.");
 			}
 			else if ((kt != RebootAndDelete) && (kt != RebootProcessAndDelete) && (!processesLeft.validate(storagePolicy))) {
-				auto newKt = (g_random->random01() < 0.33) ? RebootAndDelete : Reboot;
+				newKt = (g_random->random01() < 0.33) ? RebootAndDelete : Reboot;
 				canSurvive = false;
 				TraceEvent("KillChanged").detail("KillType", kt).detail("NewKillType", newKt).detail("storagePolicy", storagePolicy->info()).detail("ProcessesLeft", processesLeft.size()).detail("ProcessesDead", processesDead.size()).detail("RemainingZones", ::describeZones(localitiesLeft)).detail("RemainingDataHalls", ::describeDataHalls(localitiesLeft)).detail("Reason", "storagePolicy does not validates against remaining processes.");
 			}
 			else if ((kt != RebootAndDelete) && (kt != RebootProcessAndDelete) && (nQuorum > uniqueMachines.size())) {
-				auto newKt = (g_random->random01() < 0.33) ? RebootAndDelete : Reboot;
+				newKt = (g_random->random01() < 0.33) ? RebootAndDelete : Reboot;
 				canSurvive = false;
 				TraceEvent("KillChanged").detail("KillType", kt).detail("NewKillType", newKt).detail("storagePolicy", storagePolicy->info()).detail("ProcessesLeft", processesLeft.size()).detail("ProcessesDead", processesDead.size()).detail("RemainingZones", ::describeZones(localitiesLeft)).detail("RemainingDataHalls", ::describeDataHalls(localitiesLeft)).detail("Quorum", nQuorum).detail("Machines", uniqueMachines.size()).detail("Reason", "Not enough unique machines to perform auto configuration of coordinators.");
 			}
 			else {
-				TraceEvent("CanSurviveKills").detail("KillType", kt).detail("ProcessesLeft", processesLeft.size()).detail("ProcessesDead", processesDead.size()).detail("DeadZones", ::describeZones(localitiesDead)).detail("DeadDataHalls", ::describeDataHalls(localitiesDead)).detail("tLogPolicy", tLogPolicy->info()).detail("storagePolicy", storagePolicy->info()).detail("Quorum", nQuorum).detail("Machines", uniqueMachines.size()).detail("ZonesLeft", ::describeZones(localitiesLeft)).detail("ValidateRemaining", processesLeft.validate(tLogPolicy));
+				TraceEvent("CanSurviveKills").detail("KillType", kt).detail("ProcessesLeft", processesLeft.size()).detail("ProcessesDead", processesDead.size()).detail("DeadZones", ::describeZones(localitiesDead)).detail("DeadDataHalls", ::describeDataHalls(localitiesDead)).detail("tLogPolicy", tLogPolicy->info()).detail("storagePolicy", storagePolicy->info()).detail("Quorum", nQuorum).detail("Machines", uniqueMachines.size()).detail("ZonesLeft", ::describeZones(localitiesLeft)).detail("DataHallsLeft", ::describeDataHalls(localitiesLeft)).detail("ValidateRemaining", processesLeft.validate(tLogPolicy));
 			}
 		}
 		if (newKillType) *newKillType = newKt;
@ -1082,12 +1098,12 @@ public:
 		TEST( kt == InjectFaults ); // Simulated machine was killed with faults

 		if (kt == KillInstantly) {
-			TraceEvent(SevWarn, "FailMachine").detail("Name", machine->name).detail("Address", machine->address).detailext("ZoneId", machine->locality.zoneId()).detail("Process", describe(*machine)).detail("Rebooting", machine->rebooting).backtrace();
+			TraceEvent(SevWarn, "FailMachine", machine->locality.zoneId()).detail("Name", machine->name).detail("Address", machine->address).detailext("ZoneId", machine->locality.zoneId()).detail("Process", describe(*machine)).detail("Rebooting", machine->rebooting).detail("Protected", protectedAddresses.count(machine->address)).backtrace();
 			// This will remove all the "tracked" messages that came from the machine being killed
 			latestEventCache.clear();
 			machine->failed = true;
 		} else if (kt == InjectFaults) {
-			TraceEvent(SevWarn, "FaultMachine").detail("Name", machine->name).detail("Address", machine->address).detailext("ZoneId", machine->locality.zoneId()).detail("Process", describe(*machine)).detail("Rebooting", machine->rebooting).backtrace();
+			TraceEvent(SevWarn, "FaultMachine", machine->locality.zoneId()).detail("Name", machine->name).detail("Address", machine->address).detailext("ZoneId", machine->locality.zoneId()).detail("Process", describe(*machine)).detail("Rebooting", machine->rebooting).detail("Protected", protectedAddresses.count(machine->address)).backtrace();
 			should_inject_fault = simulator_should_inject_fault;
 			machine->fault_injection_r = g_random->randomUniqueID().first();
 			machine->fault_injection_p1 = 0.1;
@ -1098,8 +1114,10 @@ public:
 		ASSERT(!protectedAddresses.count(machine->address) || machine->rebooting);
 	}
 	virtual void rebootProcess( ProcessInfo* process, KillType kt ) {
-		if( kt == RebootProcessAndDelete && protectedAddresses.count(process->address) )
+		if( kt == RebootProcessAndDelete && protectedAddresses.count(process->address) ) {
+			TraceEvent("RebootChanged").detail("ZoneId", process->locality.describeZone()).detail("KillType", RebootProcess).detail("OrigKillType", kt).detail("Reason", "Protected process");
 			kt = RebootProcess;
+		}
 		doReboot( process, kt );
 	}
 	virtual void rebootProcess(Optional<Standalone<StringRef>> zoneId, bool allProcesses ) {
@ -1144,6 +1162,7 @@ public:
 		TEST(kt == InjectFaults);  // Trying to kill by injecting faults

 		if(speedUpSimulation && !forceKill) {
+			TraceEvent(SevWarn, "AbortedKill", zoneId).detailext("ZoneId", zoneId).detail("Reason", "Unforced kill within speedy simulation.").backtrace();
 			return false;
 		}

@ -1168,15 +1187,25 @@ public:
 		if ((kt == KillInstantly) || (kt == InjectFaults) || (kt == RebootAndDelete) || (kt == RebootProcessAndDelete))
 		{
 			std::vector<ProcessInfo*>	processesLeft, processesDead;
+			int	protectedWorker = 0, unavailable = 0, excluded = 0;

 			for (auto machineRec : machines) {
 				for (auto processInfo : machineRec.second.processes) {
 					// Add non-test processes (ie. datahall is not be set for test processes)
 					if (processInfo->isAvailableClass()) {
-						if (!processInfo->isAvailable())
+						// Do not include any excluded machines
+						if (processInfo->excluded) {
 							processesDead.push_back(processInfo);
-						else if (protectedAddresses.count(processInfo->address))
+							excluded ++;
+						}
+						else if (!processInfo->isAvailable()) {
+							processesDead.push_back(processInfo);
+							unavailable ++;
+						}
+						else if (protectedAddresses.count(processInfo->address)) {
 							processesLeft.push_back(processInfo);
+							protectedWorker ++;
+						}
 						else if (machineRec.second.zoneId != zoneId)
 							processesLeft.push_back(processInfo);
 						// Add processes from dead machines and datacenter machines to dead group
@ -1189,7 +1218,7 @@ public:
 				if ((kt != Reboot) && (!killIsSafe)) {
 					kt = Reboot;
 				}
-				TraceEvent("ChangedKillMachine", zoneId).detailext("ZoneId", zoneId).detail("KillType", kt).detail("OrigKillType", ktOrig).detail("ProcessesLeft", processesLeft.size()).detail("ProcessesDead", processesDead.size()).detail("TotalProcesses", machines.size()).detail("processesPerMachine", processesPerMachine).detail("tLogPolicy", tLogPolicy->info()).detail("storagePolicy", storagePolicy->info());
+				TraceEvent("ChangedKillMachine", zoneId).detailext("ZoneId", zoneId).detail("KillType", kt).detail("OrigKillType", ktOrig).detail("ProcessesLeft", processesLeft.size()).detail("ProcessesDead", processesDead.size()).detail("TotalProcesses", machines.size()).detail("processesPerMachine", processesPerMachine).detail("Protected", protectedWorker).detail("Unavailable", unavailable).detail("Excluded", excluded).detail("ProtectedTotal", protectedAddresses.size()).detail("tLogPolicy", tLogPolicy->info()).detail("storagePolicy", storagePolicy->info());
 			}
 			else if ((kt == KillInstantly) || (kt == InjectFaults)) {
 				TraceEvent("DeadMachine", zoneId).detailext("ZoneId", zoneId).detail("KillType", kt).detail("ProcessesLeft", processesLeft.size()).detail("ProcessesDead", processesDead.size()).detail("TotalProcesses", machines.size()).detail("processesPerMachine", processesPerMachine).detail("tLogPolicy", tLogPolicy->info()).detail("storagePolicy", storagePolicy->info());
@ -1216,31 +1245,30 @@ public:
 		// Check if any processes on machine are rebooting
 		if( processesOnMachine != processesPerMachine && kt >= RebootAndDelete ) {
 			TEST(true); //Attempted reboot, but the target did not have all of its processes running
-			TraceEvent(SevWarn, "AbortedReboot", zoneId).detailext("ZoneId", zoneId).detail("Reason", "The target did not have all of its processes running.").detail("processes", processesOnMachine).detail("processesPerMachine", processesPerMachine).backtrace();
+			TraceEvent(SevWarn, "AbortedKill", zoneId).detail("KillType", kt).detailext("ZoneId", zoneId).detail("Reason", "Machine processes does not match number of processes per machine").detail("processes", processesOnMachine).detail("processesPerMachine", processesPerMachine).backtrace();
 			return false;
 		}

 		// Check if any processes on machine are rebooting
 		if ( processesOnMachine != processesPerMachine) {
 			TEST(true); //Attempted reboot, but the target did not have all of its processes running
-			TraceEvent(SevWarn, "AbortedKill", zoneId).detailext("ZoneId", zoneId).detail("Reason", "The target did not have all of its processes running.").detail("processes", processesOnMachine).detail("processesPerMachine", processesPerMachine).backtrace();
+			TraceEvent(SevWarn, "AbortedKill", zoneId).detail("KillType", kt).detailext("ZoneId", zoneId).detail("Reason", "Machine processes does not match number of processes per machine").detail("processes", processesOnMachine).detail("processesPerMachine", processesPerMachine).backtrace();
 			return false;
 		}

-
 		TraceEvent("KillMachine", zoneId).detailext("ZoneId", zoneId).detail("Kt", kt).detail("KtOrig", ktOrig).detail("KilledMachines", killedMachines).detail("KillableMachines", processesOnMachine).detail("ProcessPerMachine", processesPerMachine).detail("KillChanged", kt!=ktOrig).detail("killIsSafe", killIsSafe);
 		if (kt < RebootAndDelete ) {
 			if(kt == InjectFaults && machines[zoneId].machineProcess != nullptr)
 				killProcess_internal( machines[zoneId].machineProcess, kt );
 			for (auto& process : machines[zoneId].processes) {
-				TraceEvent("KillMachineProcess", zoneId).detail("KillType", kt).detail("Process", process->toString()).detail("startingClass", process->startingClass.toString());
+				TraceEvent("KillMachineProcess", zoneId).detail("KillType", kt).detail("Process", process->toString()).detail("startingClass", process->startingClass.toString()).detail("failed", process->failed).detail("excluded", process->excluded).detail("rebooting", process->rebooting);
 				if (process->startingClass != ProcessClass::TesterClass)
 					killProcess_internal( process, kt );
 			}
 		}
 		else if ( kt == Reboot || killIsSafe) {
 			for (auto& process : machines[zoneId].processes) {
-				TraceEvent("KillMachineProcess", zoneId).detail("KillType", kt).detail("Process", process->toString()).detail("startingClass", process->startingClass.toString());
+				TraceEvent("KillMachineProcess", zoneId).detail("KillType", kt).detail("Process", process->toString()).detail("startingClass", process->startingClass.toString()).detail("failed", process->failed).detail("excluded", process->excluded).detail("rebooting", process->rebooting);
 				if (process->startingClass != ProcessClass::TesterClass)
 					doReboot(process, kt );
 			}
@ -1256,13 +1284,16 @@ public:
 		int	dcProcesses = 0;

 		// Switch to a reboot, if anything protected on machine
-		for (auto& process : processes) {
-			auto processDcId = process->locality.dcId();
-			auto processZoneId = process->locality.zoneId();
+		for (auto& procRecord : processes) {
+			auto processDcId = procRecord->locality.dcId();
+			auto processZoneId = procRecord->locality.zoneId();
 			ASSERT(processZoneId.present());
 			if (processDcId.present() && (processDcId == dcId)) {
-				if (protectedAddresses.count(process->address))
+				if ((kt != Reboot) && (protectedAddresses.count(procRecord->address))) {
 					kt = Reboot;
+					TraceEvent(SevWarn, "DcKillChanged").detailext("DataCenter", dcId).detail("KillType", kt).detail("OrigKillType", ktOrig)
+						.detail("Reason", "Datacenter has protected process").detail("ProcessAddress", procRecord->address).detail("failed", procRecord->failed).detail("rebooting", procRecord->rebooting).detail("excluded", procRecord->excluded).detail("Process", describe(*procRecord));
+				}
 				datacenterZones[processZoneId.get()] ++;
 				dcProcesses ++;
 			}
@ -1277,7 +1308,9 @@ public:
 					// Add non-test processes (ie. datahall is not be set for test processes)
 					if (processInfo->isAvailableClass()) {
 						// Mark all of the unavailable as dead
-						if (!processInfo->isAvailable())
+						if (processInfo->excluded)
+							processesDead.push_back(processInfo);
+						else if (!processInfo->isAvailable())
 							processesDead.push_back(processInfo);
 						else if (protectedAddresses.count(processInfo->address))
 							processesLeft.push_back(processInfo);
@ -1291,7 +1324,7 @@ public:
 			}

 			if (!canKillProcesses(processesLeft, processesDead, kt, &kt)) {
-				TraceEvent(SevWarn, "DcKillChanged").detailext("DataCenter", dcId).detail("KillType", ktOrig).detail("NewKillType", kt);
+				TraceEvent(SevWarn, "DcKillChanged").detailext("DataCenter", dcId).detail("KillType", kt).detail("OrigKillType", ktOrig);
 			}
 			else {
 				TraceEvent("DeadDataCenter").detailext("DataCenter", dcId).detail("KillType", kt).detail("DcZones", datacenterZones.size()).detail("DcProcesses", dcProcesses).detail("ProcessesDead", processesDead.size()).detail("ProcessesLeft", processesLeft.size()).detail("tLogPolicy", storagePolicy->info()).detail("storagePolicy", storagePolicy->info());
@ -1306,10 +1339,13 @@ public:
 			.detail("DcZones", datacenterZones.size())
 			.detail("DcProcesses", dcProcesses)
 			.detailext("DCID", dcId)
-			.detail("KillType", kt);
+			.detail("KillType", kt)
+			.detail("OrigKillType", ktOrig);

 		for (auto& datacenterZone : datacenterZones)
 		killMachine( datacenterZone.first, kt, (kt == RebootAndDelete), true);
+// ahm  If above doesn't work, go conservative
+//	killMachine( datacenterZone.first, kt, false, true);
 	}
 	virtual void clogInterface( uint32_t ip, double seconds, ClogMode mode = ClogDefault ) {
 		if (mode == ClogDefault) {
@ -1487,6 +1523,9 @@ static double networkLatency() {
 }

 ACTOR void doReboot( ISimulator::ProcessInfo *p, ISimulator::KillType kt ) {
+	TraceEvent("RebootingProcessAttempt").detailext("ZoneId", p->locality.zoneId()).detail("KillType", kt).detail("Process", p->toString()).detail("startingClass", p->startingClass.toString()).detail("failed", p->failed).detail("excluded", p->excluded).detail("rebooting", p->rebooting).detail("TaskDefaultDelay", TaskDefaultDelay);
+//	ASSERT(p->failed); //ahm
+
 	Void _ = wait( g_sim2.delay( 0, TaskDefaultDelay, p ) ); // Switch to the machine in question

 	try {
@ -1499,7 +1538,7 @@ ACTOR void doReboot( ISimulator::ProcessInfo *p, ISimulator::KillType kt ) {

 		if( p->rebooting )
 			return;
-		TraceEvent("RebootingMachine").detail("KillType", kt).detail("Address", p->address).detailext("ZoneId", p->locality.zoneId()).detailext("DataHall", p->locality.dataHallId()).detail("Locality", p->locality.toString());
+		TraceEvent("RebootingProcess").detail("KillType", kt).detail("Address", p->address).detailext("ZoneId", p->locality.zoneId()).detailext("DataHall", p->locality.dataHallId()).detail("Locality", p->locality.toString()).detail("failed", p->failed).detail("excluded", p->excluded).backtrace();
 		p->rebooting = true;
 		p->shutdownSignal.send( kt );
 	} catch (Error& e) {
--- a/fdbrpc/simulator.h
+++ b/fdbrpc/simulator.h
@ -108,6 +108,7 @@ public:
 		ProcessInfo* machineProcess;
 		std::vector<ProcessInfo*> processes;
 		std::map<std::string, Future<Reference<IAsyncFile>>> openFiles;
+		std::set<std::string> deletingFiles;
 		std::set<std::string> closingFiles;
 		Optional<Standalone<StringRef>>	zoneId;

@ -150,17 +151,81 @@ public:
 	virtual bool isAvailable() const = 0;
 	virtual void displayWorkers() const;

-	virtual void excludeAddress(NetworkAddress const& address) {
-		excludedAddresses.insert(address);
+	virtual void addRole(NetworkAddress const& address, std::string const& role) {
+		roleAddresses[address][role] ++;
+		TraceEvent("RoleAdd").detail("Address", address).detail("Role", role).detail("Roles", roleAddresses[address].size()).detail("Value", roleAddresses[address][role]);
 	}
+
+	virtual void removeRole(NetworkAddress const& address, std::string const& role) {
+		auto addressIt = roleAddresses.find(address);
+		if (addressIt != roleAddresses.end()) {
+			auto rolesIt = addressIt->second.find(role);
+			if (rolesIt != addressIt->second.end()) {
+				if (rolesIt->second > 1) {
+					rolesIt->second --;
+					TraceEvent("RoleRemove").detail("Address", address).detail("Role", role).detail("Roles", addressIt->second.size()).detail("Value", rolesIt->second).detail("Result", "Decremented Role");
+				}
+				else {
+					addressIt->second.erase(rolesIt);
+					if (addressIt->second.size()) {
+						TraceEvent("RoleRemove").detail("Address", address).detail("Role", role).detail("Roles", addressIt->second.size()).detail("Value", 0).detail("Result", "Removed Role");
+					}
+					else {
+						roleAddresses.erase(addressIt);
+						TraceEvent("RoleRemove").detail("Address", address).detail("Role", role).detail("Roles", 0).detail("Value", 0).detail("Result", "Removed Address");
+					}
+				}
+			}
+			else {
+				TraceEvent(SevWarn,"RoleRemove").detail("Address", address).detail("Role", role).detail("Result", "Role Missing");
+			}
+		}
+		else {
+			TraceEvent(SevWarn,"RoleRemove").detail("Address", address).detail("Role", role).detail("Result", "Address Missing");
+		}
+	}
+
+	virtual std::string getRoles(NetworkAddress const& address, bool skipWorkers = true) const {
+		auto addressIt = roleAddresses.find(address);
+		std::string roleText;
+		if (addressIt != roleAddresses.end()) {
+			for (auto& roleIt : addressIt->second) {
+				if ((!skipWorkers) || (roleIt.first != "Worker"))
+					roleText += roleIt.first + ((roleIt.second > 1) ? format("-%d ", roleIt.second) : " ");
+			}
+		}
+		if (roleText.empty())
+				roleText = "[unset]";
+		return roleText;
+	}
+
+	virtual void excludeAddress(NetworkAddress const& address) {
+		excludedAddresses[address]++;
+		TraceEvent("ExcludeAddress").detail("Address", address).detail("Value", excludedAddresses[address]);
+	}
+
 	virtual void includeAddress(NetworkAddress const& address) {
-		excludedAddresses.erase(address);
+		auto addressIt = excludedAddresses.find(address);
+		if (addressIt != excludedAddresses.end()) {
+			if (addressIt->second > 1) {
+				addressIt->second --;
+				TraceEvent("IncludeAddress").detail("Address", address).detail("Value", addressIt->second).detail("Result", "Decremented");
+			}
+			else {
+				excludedAddresses.erase(addressIt);
+				TraceEvent("IncludeAddress").detail("Address", address).detail("Value", 0).detail("Result", "Removed");
+			}
+		}
+		else {
+			TraceEvent(SevWarn,"IncludeAddress").detail("Address", address).detail("Result", "Missing");
+		}
 	}
 	virtual void includeAllAddresses() {
+		TraceEvent("IncludeAddressAll").detail("AddressTotal", excludedAddresses.size());
 		excludedAddresses.clear();
 	}
 	virtual bool isExcluded(NetworkAddress const& address) const {
-		return excludedAddresses.count(address) == 0;
+		return excludedAddresses.find(address) != excludedAddresses.end();
 	}

 	virtual void disableSwapToMachine(Optional<Standalone<StringRef>> zoneId ) {
@ -229,7 +294,8 @@ protected:

 private:
 	std::set<Optional<Standalone<StringRef>>> swapsDisabled;
-	std::set<NetworkAddress> excludedAddresses;
+	std::map<NetworkAddress, int> excludedAddresses;
+	std::map<NetworkAddress, std::map<std::string, int>> roleAddresses;
 	bool allSwapsDisabled;
 };

--- a/fdbserver/ClusterController.actor.cpp
+++ b/fdbserver/ClusterController.actor.cpp
@ -218,6 +218,7 @@ std::vector<std::pair<WorkerInterface, ProcessClass>> getWorkersForTlogsAcrossDa
 		std::vector<LocalityData>							unavailableLocals;
 		LocalitySetRef																					logServerSet;
 		LocalityMap<std::pair<WorkerInterface, ProcessClass>>*	logServerMap;
+		UID 		functionId = g_nondeterministic_random->randomUniqueID();
 		bool		bCompleted = false;

 		logServerSet = Reference<LocalitySet>(new LocalityMap<std::pair<WorkerInterface, ProcessClass>>());
@ -230,7 +231,7 @@ std::vector<std::pair<WorkerInterface, ProcessClass>> getWorkersForTlogsAcrossDa
 			}
 			else {
 				if (it.second.interf.locality.dataHallId().present())
-					TraceEvent(SevWarn,"GWFTADNotAvailable", id)
+					TraceEvent(SevWarn,"GWFTADNotAvailable", functionId)
 						.detail("Fitness", fitness)
 						.detailext("Zone", it.second.interf.locality.zoneId())
 						.detailext("DataHall", it.second.interf.locality.dataHallId())
@ -243,7 +244,8 @@ std::vector<std::pair<WorkerInterface, ProcessClass>> getWorkersForTlogsAcrossDa
 						.detail("Locality", it.second.interf.locality.toString())
 						.detail("tLogReplicationFactor", conf.tLogReplicationFactor)
 						.detail("tLogPolicy", conf.tLogPolicy ? conf.tLogPolicy->info() : "[unset]")
-						.detail("DesiredLogs", conf.getDesiredLogs());
+						.detail("DesiredLogs", conf.getDesiredLogs())
+						.detail("InterfaceId", id);
 				unavailableLocals.push_back(it.second.interf.locality);
 			}
 		}
@ -258,12 +260,13 @@ std::vector<std::pair<WorkerInterface, ProcessClass>> getWorkersForTlogsAcrossDa
 				logServerMap->add(worker.first.locality, &worker);
 			}
 			if (logServerSet->size() < conf.tLogReplicationFactor) {
-				TraceEvent(SevWarn,"GWFTADTooFew", id)
+				TraceEvent(SevWarn,"GWFTADTooFew", functionId)
 					.detail("Fitness", fitness)
 					.detail("Processes", logServerSet->size())
 					.detail("tLogReplicationFactor", conf.tLogReplicationFactor)
 					.detail("tLogPolicy", conf.tLogPolicy ? conf.tLogPolicy->info() : "[unset]")
-					.detail("DesiredLogs", conf.getDesiredLogs());
+					.detail("DesiredLogs", conf.getDesiredLogs())
+					.detail("InterfaceId", id);
 			}
 			else if (logServerSet->size() <= conf.getDesiredLogs()) {
 				ASSERT(conf.tLogPolicy);
@ -275,12 +278,13 @@ std::vector<std::pair<WorkerInterface, ProcessClass>> getWorkersForTlogsAcrossDa
 					break;
 				}
 				else {
-					TraceEvent(SevWarn,"GWFTADNotAcceptable", id)
+					TraceEvent(SevWarn,"GWFTADNotAcceptable", functionId)
 						.detail("Fitness", fitness)
 						.detail("Processes", logServerSet->size())
 						.detail("tLogReplicationFactor", conf.tLogReplicationFactor)
 						.detail("tLogPolicy", conf.tLogPolicy ? conf.tLogPolicy->info() : "[unset]")
-						.detail("DesiredLogs", conf.getDesiredLogs());
+						.detail("DesiredLogs", conf.getDesiredLogs())
+						.detail("InterfaceId", id);
 				}
 			}
 			// Try to select the desired size, if larger
@ -300,7 +304,7 @@ std::vector<std::pair<WorkerInterface, ProcessClass>> getWorkersForTlogsAcrossDa
 						results.push_back(*object);
 						tLocalities.push_back(object->first.locality);
 					}
-					TraceEvent("GWFTADBestResults", id)
+					TraceEvent("GWFTADBestResults", functionId)
 						.detail("Fitness", fitness)
 						.detail("Processes", logServerSet->size())
 						.detail("BestCount", bestSet.size())
@ -308,17 +312,19 @@ std::vector<std::pair<WorkerInterface, ProcessClass>> getWorkersForTlogsAcrossDa
 						.detail("BestDataHalls", ::describeDataHalls(tLocalities))
 						.detail("tLogPolicy", conf.tLogPolicy ? conf.tLogPolicy->info() : "[unset]")
 						.detail("TotalResults", results.size())
-						.detail("DesiredLogs", conf.getDesiredLogs());
+						.detail("DesiredLogs", conf.getDesiredLogs())
+						.detail("InterfaceId", id);
 					bCompleted = true;
 					break;
 				}
 				else {
-					TraceEvent(SevWarn,"GWFTADNoBest", id)
+					TraceEvent(SevWarn,"GWFTADNoBest", functionId)
 						.detail("Fitness", fitness)
 						.detail("Processes", logServerSet->size())
 						.detail("tLogReplicationFactor", conf.tLogReplicationFactor)
 						.detail("tLogPolicy", conf.tLogPolicy ? conf.tLogPolicy->info() : "[unset]")
-						.detail("DesiredLogs", conf.getDesiredLogs());
+						.detail("DesiredLogs", conf.getDesiredLogs())
+						.detail("InterfaceId", id);
 				}
 			}
 		}
@ -331,7 +337,7 @@ std::vector<std::pair<WorkerInterface, ProcessClass>> getWorkersForTlogsAcrossDa
 					tLocalities.push_back(object->first.locality);
 				}

-				TraceEvent(SevWarn, "GetTLogTeamFailed")
+				TraceEvent(SevWarn, "GetTLogTeamFailed", functionId)
 					.detail("Policy", conf.tLogPolicy->info())
 					.detail("Processes", logServerSet->size())
 					.detail("Workers", id_worker.size())
@ -344,7 +350,8 @@ std::vector<std::pair<WorkerInterface, ProcessClass>> getWorkersForTlogsAcrossDa
 					.detail("DesiredLogs", conf.getDesiredLogs())
 					.detail("RatingTests",SERVER_KNOBS->POLICY_RATING_TESTS)
 					.detail("checkStable", checkStable)
-					.detail("PolicyGenerations",SERVER_KNOBS->POLICY_GENERATIONS).backtrace();
+					.detail("PolicyGenerations",SERVER_KNOBS->POLICY_GENERATIONS)
+					.detail("InterfaceId", id).backtrace();

 			// Free the set
 			logServerSet->clear();
@ -356,14 +363,25 @@ std::vector<std::pair<WorkerInterface, ProcessClass>> getWorkersForTlogsAcrossDa
 			id_used[result.first.locality.processId()]++;
 		}

-		TraceEvent("GetTLogTeamDone")
+		TraceEvent("GetTLogTeamDone", functionId)
 			.detail("Completed", bCompleted).detail("Policy", conf.tLogPolicy->info())
 			.detail("Results", results.size()).detail("Processes", logServerSet->size())
 			.detail("Workers", id_worker.size())
 			.detail("Replication", conf.tLogReplicationFactor)
 			.detail("Desired", conf.getDesiredLogs())
 			.detail("RatingTests",SERVER_KNOBS->POLICY_RATING_TESTS)
-			.detail("PolicyGenerations",SERVER_KNOBS->POLICY_GENERATIONS);
+			.detail("PolicyGenerations",SERVER_KNOBS->POLICY_GENERATIONS)
+			.detail("InterfaceId", id);
+
+		for (auto& result : results) {
+			TraceEvent("GetTLogTeamWorker", functionId)
+				.detail("Class", result.second.toString())
+				.detail("Address", result.first.address())
+				.detailext("Zone", result.first.locality.zoneId())
+				.detailext("DataHall", result.first.locality.dataHallId())
+				.detail("isExcludedServer", conf.isExcludedServer(result.first.address()))
+				.detail("isAvailable", IFailureMonitor::failureMonitor().getState(result.first.storage.getEndpoint()).isAvailable());
+		}

 		// Free the set
 		logServerSet->clear();
--- a/fdbserver/DiskQueue.actor.cpp
+++ b/fdbserver/DiskQueue.actor.cpp
@ -419,8 +419,8 @@ public:
 		}

 		if( error.code() != error_code_actor_cancelled ) {
-			if (!self->stopped.isSet()) self->stopped.send(Void());
-			if (!self->error.isSet()) self->error.send(Never());
+			if (self->stopped.canBeSet()) self->stopped.send(Void());
+			if (self->error.canBeSet()) self->error.send(Never());
 			delete self;
 		}
 	}
--- a/fdbserver/SimulatedCluster.actor.cpp
+++ b/fdbserver/SimulatedCluster.actor.cpp
@ -204,8 +204,8 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(
 	loop {
 		auto waitTime = SERVER_KNOBS->MIN_REBOOT_TIME + (SERVER_KNOBS->MAX_REBOOT_TIME - SERVER_KNOBS->MIN_REBOOT_TIME) * g_random->random01();
 		cycles ++;
-		TraceEvent("SimulatedFDBDWait").detail("Cycles", cycles).detail("RandomId", randomId)
-			.detail("ProcessAddress", NetworkAddress(ip, port, true, false))
+		TraceEvent("SimulatedFDBDPreWait").detail("Cycles", cycles).detail("RandomId", randomId)
+			.detail("Address", NetworkAddress(ip, port, true, false))
 			.detailext("ZoneId", localities.zoneId())
 			.detail("waitTime", waitTime).detail("Port", port);

@ -219,10 +219,10 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(
 			TraceEvent("SimulatedRebooterStarting", localities.zoneId()).detail("Cycles", cycles).detail("RandomId", randomId)
 				.detailext("ZoneId", localities.zoneId())
 				.detailext("DataHall", localities.dataHallId())
-				.detail("ProcessAddress", process->address.toString())
-				.detail("ProcessExcluded", process->excluded)
+				.detail("Address", process->address.toString())
+				.detail("Excluded", process->excluded)
 				.detail("UsingSSL", useSSL);
-			TraceEvent("ProgramStart").detail("Cycles", cycles)
+			TraceEvent("ProgramStart").detail("Cycles", cycles).detail("RandomId", randomId)
 				.detail("SourceVersion", getHGVersion())
 				.detail("Version", FDB_VT_VERSION)
 				.detail("PackageName", FDB_VT_PACKAGE_NAME)
@ -248,7 +248,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(
 			} catch (Error& e) {
 				// If in simulation, if we make it here with an error other than io_timeout but enASIOTimedOut is set then somewhere an io_timeout was converted to a different error.
 				if(g_network->isSimulated() && e.code() != error_code_io_timeout && (bool)g_network->global(INetwork::enASIOTimedOut))
-					TraceEvent(SevError, "IOTimeoutErrorSuppressed").detail("ErrorCode", e.code()).backtrace();
+					TraceEvent(SevError, "IOTimeoutErrorSuppressed").detail("ErrorCode", e.code()).detail("RandomId", randomId).backtrace();

 				if (onShutdown.isReady() && onShutdown.isError()) throw onShutdown.getError();
 				if(e.code() != error_code_actor_cancelled)
@ -258,15 +258,15 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(
 			}

 			TraceEvent("SimulatedFDBDDone", localities.zoneId()).detail("Cycles", cycles).detail("RandomId", randomId)
-				.detail("ProcessAddress", process->address)
-				.detail("ProcessExcluded", process->excluded)
+				.detail("Address", process->address)
+				.detail("Excluded", process->excluded)
 				.detailext("ZoneId", localities.zoneId())
 				.detail("KillType", onShutdown.isReady() ? onShutdown.get() : ISimulator::None);

 			if (!onShutdown.isReady())
 				onShutdown = ISimulator::InjectFaults;
 		} catch (Error& e) {
-			TraceEvent(destructed ? SevInfo : SevError, "SimulatedFDBDRebooterError", localities.zoneId()).error(e, true);
+			TraceEvent(destructed ? SevInfo : SevError, "SimulatedFDBDRebooterError", localities.zoneId()).detail("RandomId", randomId).error(e, true);
 			onShutdown = e;
 		}

@ -276,6 +276,11 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(
 			process->rebooting = true;
 			process->shutdownSignal.send(ISimulator::None);
 		}
+		TraceEvent("SimulatedFDBDWait", localities.zoneId()).detail("Cycles", cycles).detail("RandomId", randomId)
+			.detail("Address", process->address)
+			.detail("Excluded", process->excluded)
+			.detail("Rebooting", process->rebooting)
+			.detailext("ZoneId", localities.zoneId());
 		Void _ = wait( g_simulator.onProcess( simProcess ) );

 		Void _ = wait(delay(0.00001 + FLOW_KNOBS->MAX_BUGGIFIED_DELAY));  // One last chance for the process to clean up?
@ -284,15 +289,15 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(

 		auto shutdownResult = onShutdown.get();
 		TraceEvent("SimulatedFDBDShutdown", localities.zoneId()).detail("Cycles", cycles).detail("RandomId", randomId)
-			.detail("ProcessAddress", process->address)
-			.detail("ProcessExcluded", process->excluded)
+			.detail("Address", process->address)
+			.detail("Excluded", process->excluded)
 			.detailext("ZoneId", localities.zoneId())
 			.detail("KillType", shutdownResult);

 		if( shutdownResult < ISimulator::RebootProcessAndDelete ) {
 			TraceEvent("SimulatedFDBDLowerReboot", localities.zoneId()).detail("Cycles", cycles).detail("RandomId", randomId)
-				.detail("ProcessAddress", process->address)
-				.detail("ProcessExcluded", process->excluded)
+				.detail("Address", process->address)
+				.detail("Excluded", process->excluded)
 				.detailext("ZoneId", localities.zoneId())
 				.detail("KillType", shutdownResult);
 			return onShutdown.get();
@ -300,7 +305,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(

 		if( onShutdown.get() == ISimulator::RebootProcessAndDelete ) {
 			TraceEvent("SimulatedFDBDRebootAndDelete", localities.zoneId()).detail("Cycles", cycles).detail("RandomId", randomId)
-				.detail("ProcessAddress", process->address)
+				.detail("Address", process->address)
 				.detailext("ZoneId", localities.zoneId())
 				.detail("KillType", shutdownResult);
 			*coordFolder = joinPath(baseFolder, g_random->randomUniqueID().toString());
@ -317,7 +322,7 @@ ACTOR Future<ISimulator::KillType> simulatedFDBDRebooter(
 		}
 		else {
 			TraceEvent("SimulatedFDBDJustRepeat", localities.zoneId()).detail("Cycles", cycles).detail("RandomId", randomId)
-				.detail("ProcessAddress", process->address)
+				.detail("Address", process->address)
 				.detailext("ZoneId", localities.zoneId())
 				.detail("KillType", shutdownResult);
 		}
@ -351,6 +356,7 @@ ACTOR Future<Void> simulatedMachine(
 	state int bootCount = 0;
 	state std::vector<std::string> myFolders;
 	state std::vector<std::string> coordFolders;
+	state UID randomId = g_nondeterministic_random->randomUniqueID();

 	try {
 		CSimpleIni ini;
@ -387,6 +393,7 @@ ACTOR Future<Void> simulatedMachine(
 				std::string path = joinPath(myFolders[i], "fdb.cluster");
 				Reference<ClusterConnectionFile> clusterFile(useSeedFile ? new ClusterConnectionFile(path, connStr.toString()) : new ClusterConnectionFile(path));
 				processes.push_back(simulatedFDBDRebooter(clusterFile, ips[i], sslEnabled, i + 1, localities, processClass, &myFolders[i], &coordFolders[i], baseFolder, connStr, useSeedFile, runBackupAgents));
+				TraceEvent("SimulatedMachineProcess", randomId).detail("Address", NetworkAddress(ips[i], i+1, true, false)).detailext("ZoneId", localities.zoneId()).detailext("DataHall", localities.dataHallId()).detail("Folder", myFolders[i]);
 			}

 			TEST( bootCount >= 1 ); // Simulated machine rebooted
@ -394,7 +401,7 @@ ACTOR Future<Void> simulatedMachine(
 			TEST( bootCount >= 3 ); // Simulated machine rebooted three times
 			++bootCount;

-			TraceEvent("SimulatedMachineStart")
+			TraceEvent("SimulatedMachineStart", randomId)
 				.detail("Folder0", myFolders[0])
 				.detail("CFolder0", coordFolders[0])
 				.detail("MachineIPs", toIPVectorString(ips))
@ -410,7 +417,7 @@ ACTOR Future<Void> simulatedMachine(

 			Void _ = wait( waitForAll( processes ) );

-			TraceEvent("SimulatedMachineRebootStart")
+			TraceEvent("SimulatedMachineRebootStart", randomId)
 				.detail("Folder0", myFolders[0])
 				.detail("CFolder0", coordFolders[0])
 				.detail("MachineIPs", toIPVectorString(ips))
@ -442,7 +449,12 @@ ACTOR Future<Void> simulatedMachine(
 				ASSERT( it.second.isReady() && !it.second.isError() );
 			}

-			TraceEvent("SimulatedMachineRebootAfterKills")
+			for( auto it : g_simulator.getMachineById(localities.zoneId())->deletingFiles ) {
+				filenames.insert( it );
+				closingStr += it + ", ";
+			}
+
+			TraceEvent("SimulatedMachineRebootAfterKills", randomId)
 				.detail("Folder0", myFolders[0])
 				.detail("CFolder0", coordFolders[0])
 				.detail("MachineIPs", toIPVectorString(ips))
@ -471,12 +483,12 @@ ACTOR Future<Void> simulatedMachine(
 						openFiles += *it + ", ";
 						i++;
 					}
-					TraceEvent("MachineFilesOpen").detail("PAddr", toIPVectorString(ips)).detail("OpenFiles", openFiles);
+					TraceEvent("MachineFilesOpen", randomId).detail("PAddr", toIPVectorString(ips)).detail("OpenFiles", openFiles);
 				} else
 					break;

 				if( shutdownDelayCount++ >= 50 ) {  // Worker doesn't shut down instantly on reboot
-					TraceEvent(SevError, "SimulatedFDBDFilesCheck")
+					TraceEvent(SevError, "SimulatedFDBDFilesCheck", randomId)
 						.detail("PAddrs", toIPVectorString(ips))
 						.detailext("ZoneId", localities.zoneId())
 						.detailext("DataHall", localities.dataHallId());
@ -487,8 +499,8 @@ ACTOR Future<Void> simulatedMachine(
 				backoff = std::min( backoff + 1.0, 6.0 );
 			}

-			TraceEvent("SimulatedFDBDFilesClosed")
-				.detail("ProcessAddress", toIPVectorString(ips))
+			TraceEvent("SimulatedFDBDFilesClosed", randomId)
+				.detail("Address", toIPVectorString(ips))
 				.detailext("ZoneId", localities.zoneId())
 				.detailext("DataHall", localities.dataHallId());

@ -510,7 +522,7 @@ ACTOR Future<Void> simulatedMachine(

 			auto rebootTime = g_random->random01() * MACHINE_REBOOT_TIME;

-			TraceEvent("SimulatedMachineShutdown")
+			TraceEvent("SimulatedMachineShutdown", randomId)
 				.detail("Swap", swap)
 				.detail("KillType", killType)
 				.detail("RebootTime", rebootTime)
@ -530,7 +542,7 @@ ACTOR Future<Void> simulatedMachine(

 				if( myFolders != toRebootFrom ) {
 					TEST( true ); // Simulated machine swapped data folders
-					TraceEvent("SimulatedMachineFolderSwap")
+					TraceEvent("SimulatedMachineFolderSwap", randomId)
 						.detail("OldFolder0", myFolders[0]).detail("NewFolder0", toRebootFrom[0])
 						.detail("MachineIPs", toIPVectorString(ips));
 				}
--- a/fdbserver/Status.actor.cpp
+++ b/fdbserver/Status.actor.cpp
@ -947,6 +947,7 @@ ACTOR static Future<double> doGrvProbe(Transaction *tr, Optional<FDBTransactionO

 	loop {
 		try {
+			tr->setOption(FDBTransactionOptions::LOCK_AWARE);
 			if(priority.present()) {
 				tr->setOption(priority.get());
 			}
@ -969,6 +970,7 @@ ACTOR static Future<double> doReadProbe(Future<double> grvProbe, Transaction *tr
 	state double start = timer_monotonic();

 	loop {
+		tr->setOption(FDBTransactionOptions::LOCK_AWARE);
 		try {
 			Optional<Standalone<StringRef> > _ = wait(tr->get(LiteralStringRef("\xff/StatusJsonTestKey62793")));
 			return timer_monotonic() - start;
@ -993,6 +995,7 @@ ACTOR static Future<double> doCommitProbe(Future<double> grvProbe, Transaction *

 	loop {
 		try {
+			tr->setOption(FDBTransactionOptions::LOCK_AWARE);
 			tr->setOption(FDBTransactionOptions::PRIORITY_SYSTEM_IMMEDIATE);
 			tr->makeSelfConflicting();
 			Void _ = wait(tr->commit());
@ -1022,9 +1025,7 @@ ACTOR static Future<Void> doProbe(Future<double> probe, int timeoutSeconds, cons
 	return Void();
 }

-ACTOR static Future<StatusObject> latencyProbeFetcher(Reference<AsyncVar<struct ServerDBInfo>> db, StatusArray *messages, std::set<std::string> *incomplete_reasons) {
-	Database cx = openDBOnServer(db, TaskDefaultEndpoint, true, true); // Open a new database connection that is lock-aware
-
+ACTOR static Future<StatusObject> latencyProbeFetcher(Database cx, StatusArray *messages, std::set<std::string> *incomplete_reasons) {
 	state Transaction trImmediate(cx);
 	state Transaction trDefault(cx);
 	state Transaction trBatch(cx);
@ -1777,9 +1778,7 @@ ACTOR Future<StatusReply> clusterGetStatus(

 		if (configuration.present()){
 			// Do the latency probe by itself to avoid interference from other status activities
-			state Future<StatusObject> latencyProbe = latencyProbeFetcher(db, &messages, &status_incomplete_reasons);
-
-			StatusObject latencyProbeResults = wait(latencyProbe);
+			StatusObject latencyProbeResults = wait(latencyProbeFetcher(cx, &messages, &status_incomplete_reasons));

 			statusObj["database_available"] = latencyProbeResults.count("immediate_priority_transaction_start_seconds") && latencyProbeResults.count("read_seconds") && latencyProbeResults.count("commit_seconds");
 			if (!latencyProbeResults.empty()) {
--- a/fdbserver/TLogServer.actor.cpp
+++ b/fdbserver/TLogServer.actor.cpp
@ -1757,7 +1757,7 @@ ACTOR Future<Void> tLog( IKeyValueStore* persistentData, IDiskQueue* persistentQ
 			}
 		}
 	} catch (Error& e) {
-		TraceEvent("TLogError", tlogId).error(e);
+		TraceEvent("TLogError", tlogId).error(e, true);
 		while(!tlogRequests.isEmpty()) {
 			tlogRequests.getFuture().pop().reply.sendError(recruitment_failed());
 		}
--- a/fdbserver/TesterInterface.h
+++ b/fdbserver/TesterInterface.h
@ -83,11 +83,11 @@ struct TesterInterface {
 	}
 };

-Future<Void> testerServerCore( TesterInterface const& interf, Reference<ClusterConnectionFile> const& ccf, Reference<AsyncVar<struct ServerDBInfo>> const& );
+Future<Void> testerServerCore( TesterInterface const& interf, Reference<ClusterConnectionFile> const& ccf, Reference<AsyncVar<struct ServerDBInfo>> const&, LocalityData const& );

 enum test_location_t { TEST_HERE, TEST_ON_SERVERS, TEST_ON_TESTERS };
 enum test_type_t { TEST_TYPE_FROM_FILE, TEST_TYPE_CONSISTENCY_CHECK };

-Future<Void> runTests( Reference<ClusterConnectionFile> const& connFile, test_type_t const& whatToRun, test_location_t const& whereToRun, int const& minTestersExpected, std::string const& fileName = std::string(), StringRef const& startingConfiguration = StringRef() );
+Future<Void> runTests( Reference<ClusterConnectionFile> const& connFile, test_type_t const& whatToRun, test_location_t const& whereToRun, int const& minTestersExpected, std::string const& fileName = std::string(), StringRef const& startingConfiguration = StringRef(), LocalityData const& locality = LocalityData() );

 #endif
--- a/fdbserver/fdbserver.actor.cpp
+++ b/fdbserver/fdbserver.actor.cpp
@ -1528,6 +1528,17 @@ int main(int argc, char* argv[]) {

 		Future<Optional<Void>> f;

+		Standalone<StringRef> machineId(getSharedMemoryMachineId().toString());
+
+		if (!localities.isPresent(LocalityData::keyZoneId))
+			localities.set(LocalityData::keyZoneId, zoneId.present() ? zoneId : machineId);
+
+		if (!localities.isPresent(LocalityData::keyMachineId))
+			localities.set(LocalityData::keyMachineId, machineId);
+
+		if (!localities.isPresent(LocalityData::keyDcId) && dcId.present())
+			localities.set(LocalityData::keyDcId, dcId);
+
 		if (role == Simulation) {
 			TraceEvent("Simulation").detail("TestFile", testFile);

@ -1574,16 +1585,6 @@ int main(int argc, char* argv[]) {

 			vector<Future<Void>> actors;
 			actors.push_back( listenError );
-			Standalone<StringRef> machineId(getSharedMemoryMachineId().toString());
-
-			if (!localities.isPresent(LocalityData::keyZoneId))
-				localities.set(LocalityData::keyZoneId, zoneId.present() ? zoneId : machineId);
-
-			if (!localities.isPresent(LocalityData::keyMachineId))
-				localities.set(LocalityData::keyMachineId, machineId);
-
-			if (!localities.isPresent(LocalityData::keyDcId) && dcId.present())
-				localities.set(LocalityData::keyDcId, dcId);

 			actors.push_back( fdbd(connectionFile, localities, processClass, dataFolder, dataFolder, storageMemLimit, metricsConnFile, metricsPrefix) );
 			//actors.push_back( recurring( []{}, .001 ) );  // for ASIO latency measurement
@ -1591,11 +1592,11 @@ int main(int argc, char* argv[]) {
 			f = stopAfter( waitForAll(actors) );
 			g_network->run();
 		} else if (role == MultiTester) {
-			f = stopAfter( runTests( connectionFile, TEST_TYPE_FROM_FILE, testOnServers ? TEST_ON_SERVERS : TEST_ON_TESTERS, minTesterCount, testFile ) );
+			f = stopAfter( runTests( connectionFile, TEST_TYPE_FROM_FILE, testOnServers ? TEST_ON_SERVERS : TEST_ON_TESTERS, minTesterCount, testFile, StringRef(), localities ) );
 			g_network->run();
 		} else if (role == Test || role == ConsistencyCheck) {
 			auto m = startSystemMonitor(dataFolder, zoneId, zoneId);
-			f = stopAfter( runTests( connectionFile, role == ConsistencyCheck ? TEST_TYPE_CONSISTENCY_CHECK : TEST_TYPE_FROM_FILE, TEST_HERE, 1, testFile ) );
+			f = stopAfter( runTests( connectionFile, role == ConsistencyCheck ? TEST_TYPE_CONSISTENCY_CHECK : TEST_TYPE_FROM_FILE, TEST_HERE, 1, testFile, StringRef(), localities ) );
 			g_network->run();
 		} else if (role == CreateTemplateDatabase) {
 			createTemplateDatabase();
--- a/fdbserver/tester.actor.cpp
+++ b/fdbserver/tester.actor.cpp
@ -484,7 +484,7 @@ ACTOR Future<Void> runWorkloadAsync( Database cx, WorkloadInterface workIface, T
 	return Void();
 }

-ACTOR Future<Void> testerServerWorkload( WorkloadRequest work, Reference<ClusterConnectionFile> ccf, Reference<AsyncVar<struct ServerDBInfo>> dbInfo ) {
+ACTOR Future<Void> testerServerWorkload( WorkloadRequest work, Reference<ClusterConnectionFile> ccf, Reference<AsyncVar<struct ServerDBInfo>> dbInfo, LocalityData locality ) {
 	state WorkloadInterface workIface;
 	state bool replied = false;
 	state Database cx;
@ -501,7 +501,7 @@ ACTOR Future<Void> testerServerWorkload( WorkloadRequest work, Reference<Cluster

 		if( database.size() ) {
 			Reference<Cluster> cluster = Cluster::createCluster(ccf->getFilename(), -1);
-			Database _cx = wait(cluster->createDatabase(database));
+			Database _cx = wait(cluster->createDatabase(database, locality));
 			cx = _cx;

 			Void _ = wait( delay(1.0) );
@ -544,7 +544,7 @@ ACTOR Future<Void> testerServerWorkload( WorkloadRequest work, Reference<Cluster
 	return Void();
 }

-ACTOR Future<Void> testerServerCore( TesterInterface interf, Reference<ClusterConnectionFile> ccf, Reference<AsyncVar<struct ServerDBInfo>> dbInfo ) {
+ACTOR Future<Void> testerServerCore( TesterInterface interf, Reference<ClusterConnectionFile> ccf, Reference<AsyncVar<struct ServerDBInfo>> dbInfo, LocalityData locality ) {
 	state PromiseStream<Future<Void>> addWorkload;
 	state Future<Void> workerFatalError = actorCollection(addWorkload.getFuture());

@ -552,7 +552,7 @@ ACTOR Future<Void> testerServerCore( TesterInterface interf, Reference<ClusterCo
 	loop choose {
 		when (Void _ = wait(workerFatalError)) {}
 		when (WorkloadRequest work = waitNext( interf.recruitments.getFuture() )) {
-			addWorkload.send(testerServerWorkload(work, ccf, dbInfo));
+			addWorkload.send(testerServerWorkload(work, ccf, dbInfo, locality));
 		}
 	}
 }
@ -985,7 +985,7 @@ vector<TestSpec> readTests( ifstream& ifs ) {
 	return result;
 }

-ACTOR Future<Void> runTests( Reference<AsyncVar<Optional<struct ClusterControllerFullInterface>>> cc, Reference<AsyncVar<Optional<struct ClusterInterface>>> ci, vector< TesterInterface > testers, vector<TestSpec> tests, StringRef startingConfiguration ) {
+ACTOR Future<Void> runTests( Reference<AsyncVar<Optional<struct ClusterControllerFullInterface>>> cc, Reference<AsyncVar<Optional<struct ClusterInterface>>> ci, vector< TesterInterface > testers, vector<TestSpec> tests, StringRef startingConfiguration, LocalityData locality ) {
 	state Standalone<StringRef> database = LiteralStringRef("DB");
 	state Database cx;
 	state Reference<AsyncVar<ServerDBInfo>> dbInfo( new AsyncVar<ServerDBInfo> );
@ -1016,7 +1016,7 @@ ACTOR Future<Void> runTests( Reference<AsyncVar<Optional<struct ClusterControlle
 		databasePingDelay = 0.0;
 	
 	if (useDB) {
-		Database _cx = wait( DatabaseContext::createDatabase( ci, Reference<Cluster>(), database, LocalityData() ) ); // FIXME: Locality!
+		Database _cx = wait( DatabaseContext::createDatabase( ci, Reference<Cluster>(), database, locality ) );
 		cx = _cx;
 	} else
 		database = LiteralStringRef("");
@ -1071,7 +1071,7 @@ ACTOR Future<Void> runTests( Reference<AsyncVar<Optional<struct ClusterControlle

 ACTOR Future<Void> runTests( Reference<AsyncVar<Optional<struct ClusterControllerFullInterface>>> cc, 
 		Reference<AsyncVar<Optional<struct ClusterInterface>>> ci, vector<TestSpec> tests, test_location_t at, 
-		int minTestersExpected, StringRef startingConfiguration ) {
+		int minTestersExpected, StringRef startingConfiguration, LocalityData locality ) {
 	state int flags = at == TEST_ON_SERVERS ? 0 : GetWorkersRequest::FLAG_TESTER_CLASS;
 	state Future<Void> testerTimeout = delay(60.0); // wait 60 sec for testers to show up
 	state vector<std::pair<WorkerInterface, ProcessClass>> workers;
@ -1097,12 +1097,12 @@ ACTOR Future<Void> runTests( Reference<AsyncVar<Optional<struct ClusterControlle
 	for(int i=0; i<workers.size(); i++)
 		ts.push_back(workers[i].first.testerInterface);

-	Void _ = wait( runTests( cc, ci, ts, tests, startingConfiguration) );
+	Void _ = wait( runTests( cc, ci, ts, tests, startingConfiguration, locality) );
 	return Void();
 }

 ACTOR Future<Void> runTests( Reference<ClusterConnectionFile> connFile, test_type_t whatToRun, test_location_t at, 
-		int minTestersExpected, std::string fileName, StringRef startingConfiguration ) {
+		int minTestersExpected, std::string fileName, StringRef startingConfiguration, LocalityData locality ) {
 	state vector<TestSpec> testSpecs;
 	Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> cc( new AsyncVar<Optional<ClusterControllerFullInterface>> );
 	Reference<AsyncVar<Optional<ClusterInterface>>> ci( new AsyncVar<Optional<ClusterInterface>> );
@ -1147,10 +1147,10 @@ ACTOR Future<Void> runTests( Reference<ClusterConnectionFile> connFile, test_typ
 		Reference<AsyncVar<ServerDBInfo>> db( new AsyncVar<ServerDBInfo> );
 		vector<TesterInterface> iTesters(1);
 		actors.push_back( reportErrors(monitorServerDBInfo( cc, Reference<ClusterConnectionFile>(), LocalityData(), db ), "monitorServerDBInfo") );  // FIXME: Locality
-		actors.push_back( reportErrors(testerServerCore( iTesters[0], connFile, db ), "testerServerCore") );
-		tests = runTests( cc, ci, iTesters, testSpecs, startingConfiguration );
+		actors.push_back( reportErrors(testerServerCore( iTesters[0], connFile, db, locality ), "testerServerCore") );
+		tests = runTests( cc, ci, iTesters, testSpecs, startingConfiguration, locality );
 	} else {
-		tests = reportErrors(runTests(cc, ci, testSpecs, at, minTestersExpected, startingConfiguration), "runTests");
+		tests = reportErrors(runTests(cc, ci, testSpecs, at, minTestersExpected, startingConfiguration, locality), "runTests");
 	}

 	choose {
--- a/fdbserver/worker.actor.cpp
+++ b/fdbserver/worker.actor.cpp
@ -356,6 +356,7 @@ void startRole(UID roleId, UID workerId, std::string as, std::map<std::string, s
 	g_roles.insert({as, roleId.shortString()});
 	StringMetricHandle(LiteralStringRef("Roles")) = roleString(g_roles, false);
 	StringMetricHandle(LiteralStringRef("RolesWithIDs")) = roleString(g_roles, true);
+	if (g_network->isSimulated()) g_simulator.addRole(g_network->getLocalAddress(), as);
 }

 void endRole(UID id, std::string as, std::string reason, bool ok, Error e) {
@ -387,6 +388,7 @@ void endRole(UID id, std::string as, std::string reason, bool ok, Error e) {
 	g_roles.erase({as, id.shortString()});
 	StringMetricHandle(LiteralStringRef("Roles")) = roleString(g_roles, false);
 	StringMetricHandle(LiteralStringRef("RolesWithIDs")) = roleString(g_roles, true);
+	if (g_network->isSimulated()) g_simulator.removeRole(g_network->getLocalAddress(), as);
 }

 ACTOR Future<Void> monitorServerDBInfo( Reference<AsyncVar<Optional<ClusterControllerFullInterface>>> ccInterface, Reference<ClusterConnectionFile> connFile, LocalityData locality, Reference<AsyncVar<ServerDBInfo>> dbInfo ) {
@ -510,7 +512,7 @@ ACTOR Future<Void> workerServer( Reference<ClusterConnectionFile> connFile, Refe
 		if( metricsConnFile.size() > 0) {
 			try {
 				state Reference<Cluster> cluster = Cluster::createCluster( metricsConnFile, Cluster::API_VERSION_LATEST );
-				metricsLogger = runMetrics( cluster->createDatabase(LiteralStringRef("DB")), KeyRef(metricsPrefix) );
+				metricsLogger = runMetrics( cluster->createDatabase(LiteralStringRef("DB"), locality), KeyRef(metricsPrefix) );
 			} catch(Error &e) {
 				TraceEvent(SevWarnAlways, "TDMetricsBadClusterFile").error(e).detail("ConnFile", metricsConnFile);
 			}
@ -524,7 +526,7 @@ ACTOR Future<Void> workerServer( Reference<ClusterConnectionFile> connFile, Refe
 	errorForwarders.add( registrationClient( ccInterface, interf, processClass ) );
 	errorForwarders.add( waitFailureServer( interf.waitFailure.getFuture() ) );
 	errorForwarders.add( monitorServerDBInfo( ccInterface, connFile, locality, dbInfo ) );
-	errorForwarders.add( testerServerCore( interf.testerInterface, connFile, dbInfo ) );
+	errorForwarders.add( testerServerCore( interf.testerInterface, connFile, dbInfo, locality ) );

 	filesClosed.add(stopping.getFuture());

--- a/fdbserver/workloads/ApiCorrectness.actor.cpp
+++ b/fdbserver/workloads/ApiCorrectness.actor.cpp
@ -132,11 +132,12 @@ public:
 	}

 	ACTOR Future<Void> performSetup(Database cx, ApiCorrectnessWorkload *self) {
-		//Choose a random transaction type (NativeAPI, ReadYourWrites, ThreadSafe)
+		//Choose a random transaction type (NativeAPI, ReadYourWrites, ThreadSafe, MultiVersion)
 		std::vector<TransactionType> types;
 		types.push_back(NATIVE);
 		types.push_back(READ_YOUR_WRITES);
 		types.push_back(THREAD_SAFE);
+		types.push_back(MULTI_VERSION);

 		Void _ = wait(self->chooseTransactionFactory(cx, types));

--- a/fdbserver/workloads/ConsistencyCheck.actor.cpp
+++ b/fdbserver/workloads/ConsistencyCheck.actor.cpp
@ -1099,6 +1099,7 @@ struct ConsistencyCheckWorkload : TestWorkload
 				if(!statefulProcesses[itr->first.address()].count(id)) {
 					TraceEvent("ConsistencyCheck_ExtraDataStore").detail("Address", itr->first.address()).detail("DataStoreID", id);
 					if(g_network->isSimulated()) {
+						TraceEvent("ConsistencyCheck_RebootProcess").detail("Address", itr->first.address()).detail("DataStoreID", id);
 						g_simulator.rebootProcess(g_simulator.getProcessByAddress(itr->first.address()), ISimulator::RebootProcess);
 					}

--- a/fdbserver/workloads/DummyWorkload.actor.cpp
+++ b/fdbserver/workloads/DummyWorkload.actor.cpp
@ -24,10 +24,12 @@
 // The workload that do nothing. It can be used for waiting for quiescence
 struct DummyWorkload : TestWorkload {
 	bool displayWorkers;
+	double displayDelay;

 	DummyWorkload(WorkloadContext const& wcx)
 	: TestWorkload(wcx) {
 		displayWorkers = getOption(options, LiteralStringRef("displayWorkers"), true);
+		displayDelay = getOption(options, LiteralStringRef("displayDelay"), 0.0);
 	}

 	virtual std::string description() {
@ -35,7 +37,15 @@ struct DummyWorkload : TestWorkload {
 	}

 	virtual Future<Void> start(Database const& cx) {
-		if ((clientId == 0) && (displayWorkers))
+		if ((clientId == 0) && (displayWorkers)) {
+			return _start(this, cx);
+		}
+		return Void();
+	}
+
+	ACTOR static Future<Void> _start( DummyWorkload* self, Database cx) {
+		if (self->displayDelay > 0.0)
+			Void _ = wait(delay(self->displayDelay));
 		g_simulator.displayWorkers();
 		return Void();
 	}
--- a/fdbserver/workloads/MachineAttrition.actor.cpp
+++ b/fdbserver/workloads/MachineAttrition.actor.cpp
@ -155,6 +155,7 @@ struct MachineAttritionWorkload : TestWorkload {
 				LocalityData targetMachine = self->machines.back();

 				TraceEvent("Assassination").detail("TargetMachine", targetMachine.toString())
+					.detailext("zoneId", targetMachine.zoneId())
 					.detail("Reboot", self->reboot).detail("killedMachines", killedMachines)
 					.detail("machinesToKill", self->machinesToKill).detail("machinesToLeave", self->machinesToLeave)
 					.detail("machines", self->machines.size()).detail("Replace", self->replacement);
@ -166,8 +167,9 @@ struct MachineAttritionWorkload : TestWorkload {
 						g_simulator.killMachine( targetMachine.zoneId(), ISimulator::Reboot );
 					}
 				} else {
-					TraceEvent("WorkerKill").detail("MachineCount", self->machines.size());
-					if( g_random->random01() < 0.33 ) {
+					auto randomDouble = g_random->random01();
+					TraceEvent("WorkerKill").detail("MachineCount", self->machines.size()).detail("RandomValue", randomDouble);
+					if (randomDouble < 0.33 ) {
 						TraceEvent("RebootAndDelete").detail("TargetMachine", targetMachine.toString());
 						g_simulator.killMachine( targetMachine.zoneId(), ISimulator::RebootAndDelete );
 					} else {
--- a/fdbserver/workloads/RemoveServersSafely.actor.cpp
+++ b/fdbserver/workloads/RemoveServersSafely.actor.cpp
@ -26,9 +26,6 @@
 #include "fdbrpc/simulator.h"
 #include "fdbclient/ManagementAPI.h"

-const char*		removeClearEnv = getenv("REMOVE_CLEAR");
-int						removeClear = removeClearEnv ? atoi(removeClearEnv) : 1;
-
 template <>
 std::string describe( uint32_t const& item ) {
 	return format("%d", item);
@ -154,6 +151,7 @@ struct RemoveServersSafelyWorkload : TestWorkload {
 	{
 		std::vector<ISimulator::ProcessInfo*>	processes;
 		std::set<AddressExclusion>	processAddrs;
+		UID functionId = g_nondeterministic_random->randomUniqueID();

 		// Get the list of process network addresses
 		for (auto& netAddr : netAddrs) {
@ -170,24 +168,64 @@ struct RemoveServersSafelyWorkload : TestWorkload {
 		// Get the list of processes matching network address
 		for (auto processInfo : g_simulator.getAllProcesses()) {
 			auto processNet = AddressExclusion(processInfo->address.ip, processInfo->address.port);
-			if (processAddrs.find(processNet) != processAddrs.end())
+			if (processAddrs.find(processNet) != processAddrs.end()) {
 				processes.push_back(processInfo);
+				TraceEvent("RemoveAndKill", functionId).detail("Step", "getProcessItem").detail("ProcessAddress", processInfo->address).detail("Process", describe(*processInfo)).detail("failed", processInfo->failed).detail("excluded", processInfo->excluded).detail("rebooting", processInfo->rebooting).detail("Protected", g_simulator.protectedAddresses.count(processInfo->address));
 			}
-		TraceEvent("RemoveAndKill").detail("Step", "getProcesses")
+			else {
+				TraceEvent("RemoveAndKill", functionId).detail("Step", "getProcessNoItem").detail("ProcessAddress", processInfo->address).detail("Process", describe(*processInfo)).detail("failed", processInfo->failed).detail("excluded", processInfo->excluded).detail("rebooting", processInfo->rebooting).detail("Protected", g_simulator.protectedAddresses.count(processInfo->address));
+			}
+		}
+		TraceEvent("RemoveAndKill", functionId).detail("Step", "getProcesses")
+			.detail("netAddrSize",netAddrs.size()).detail("processAddrSize",processAddrs.size())
 			.detail("netAddrs",describe(netAddrs)).detail("processAddrs",describe(processAddrs))
 			.detail("Proceses", processes.size()).detail("MachineProcesses", machineProcesses.size());

-		// Processes may have been destroyed causing
-//		ASSERT(processAddrs.size() == processes.size());
 		return processes;
 	}

+	virtual std::vector<ISimulator::ProcessInfo*> excludeAddresses(std::set<AddressExclusion> const& procAddrs)
+	{
+		// Get the updated list of processes which may have changed due to reboots, deletes, etc
+		std::vector<ISimulator::ProcessInfo*>	procArray = getProcesses(procAddrs);
+
+		// Include all of the excluded machines because the first command of the next section is includeall
+		TraceEvent("RemoveAndKill").detail("Step", "exclude addresses").detail("AddrTotal", procAddrs.size()).detail("ProcTotal", procArray.size()).detail("Addresses", describe(procAddrs)).detail("ClusterAvailable", g_simulator.isAvailable());
+		for (auto& procAddr : procAddrs) {
+			g_simulator.excludeAddress(NetworkAddress(procAddr.ip, procAddr.port, true, false));
+		}
+		for (auto& procRecord : procArray) {
+			procRecord->excluded = true;
+			TraceEvent("RemoveAndKill").detail("Step", "ExcludeAddress").detail("ProcessAddress", procRecord->address).detail("Process", describe(*procRecord)).detail("failed", procRecord->failed).detail("rebooting", procRecord->rebooting).detail("ClusterAvailable", g_simulator.isAvailable());
+		}
+		return procArray;
+	}
+
+	virtual std::vector<ISimulator::ProcessInfo*> includeAddresses(std::set<AddressExclusion> const& procAddrs)
+	{
+		// Get the updated list of processes which may have changed due to reboots, deletes, etc
+		std::vector<ISimulator::ProcessInfo*>	procArray = getProcesses(procAddrs);
+
+		// Include all of the excluded machines because the first command of the next section is includeall
+		TraceEvent("RemoveAndKill").detail("Step", "include addresses").detail("AddrTotal", procAddrs.size()).detail("ProcTotal", procArray.size()).detail("Addresses", describe(procAddrs)).detail("ClusterAvailable", g_simulator.isAvailable());
+		for (auto& procAddr : procAddrs) {
+			g_simulator.includeAddress(NetworkAddress(procAddr.ip, procAddr.port, true, false));
+		}
+		for (auto& procRecord : procArray) {
+			// Only change the exclusion member, if not failed since it will require a reboot to revive it
+			if (!procRecord->failed)
+				procRecord->excluded = false;
+			TraceEvent("RemoveAndKill").detail("Step", "IncludeAddress").detail("ProcessAddress", procRecord->address).detail("Process", describe(*procRecord)).detail("failed", procRecord->failed).detail("rebooting", procRecord->rebooting).detail("ClusterAvailable", g_simulator.isAvailable());
+		}
+		return procArray;
+	}
+
 	virtual std::vector<ISimulator::ProcessInfo*> protectServers(std::set<AddressExclusion> const& killAddrs)
 	{
 		std::vector<ISimulator::ProcessInfo*>	processes;
 		std::set<AddressExclusion>	processAddrs;
 		std::vector<AddressExclusion> killableAddrs;
-		std::vector<ISimulator::ProcessInfo*>	killProcesses, killableProcesses, processesLeft, processesDead;
+		std::vector<ISimulator::ProcessInfo*>	killProcArray, killableProcesses, processesLeft, processesDead;

 		// Get the list of processes matching network address
 		for (auto processInfo : getServers()) {
@ -199,7 +237,7 @@ struct RemoveServersSafelyWorkload : TestWorkload {
 			else if (killAddrs.find(processNet) == killAddrs.end())
 				processesLeft.push_back(processInfo);
 			else
-				killProcesses.push_back(processInfo);
+				killProcArray.push_back(processInfo);
 		}

 		// Identify the largest set of processes which can be killed
@ -207,22 +245,22 @@ struct RemoveServersSafelyWorkload : TestWorkload {
 		bool bCanKillProcess;
 		ISimulator::ProcessInfo*	randomProcess;
 		auto deadProcess = processesDead.back();
-		for (int killsLeft = killProcesses.size(); killsLeft > 0; killsLeft --)
+		for (int killsLeft = killProcArray.size(); killsLeft > 0; killsLeft --)
 		{
 			// Select a random kill process
 			randomIndex = g_random->randomInt(0, killsLeft);
-			randomProcess = killProcesses[randomIndex];
+			randomProcess = killProcArray[randomIndex];
 			processesDead.push_back(randomProcess);
-			killProcesses[randomIndex] = killProcesses.back();
-			killProcesses.pop_back();
+			killProcArray[randomIndex] = killProcArray.back();
+			killProcArray.pop_back();
 			// Add all of the remaining processes the leftover array
-			processesLeft.insert(processesLeft.end(), killProcesses.begin(), killProcesses.end());
+			processesLeft.insert(processesLeft.end(), killProcArray.begin(), killProcArray.end());

 			// Check if we can kill the added process
 			bCanKillProcess = g_simulator.canKillProcesses(processesLeft, processesDead, ISimulator::KillInstantly, NULL);

 			// Remove the added processes
-			processesLeft.resize(processesLeft.size() - killProcesses.size());
+			processesLeft.resize(processesLeft.size() - killProcArray.size());

 			if (bCanKillProcess) {
 				killableProcesses.push_back(randomProcess);
@ -247,94 +285,133 @@ struct RemoveServersSafelyWorkload : TestWorkload {

 		// Removing the first set of machines might legitimately bring the database down, so a timeout is not an error
 		state std::vector<NetworkAddress> firstCoordinators;
-		state std::vector<ISimulator::ProcessInfo*>	killProcesses;
+		state std::vector<ISimulator::ProcessInfo*>	killProcArray;
+		state bool bClearedFirst;

-		TraceEvent("RemoveAndKill").detail("Step", "exclude first list").detail("toKill1", describe(toKill1)).detail("KillTotal", toKill1.size())
-			.detail("ClusterAvailable", g_simulator.isAvailable());
+		TraceEvent("RemoveAndKill").detail("Step", "exclude list first").detail("toKill", describe(toKill1)).detail("KillTotal", toKill1.size()).detail("ClusterAvailable", g_simulator.isAvailable());
+		self->excludeAddresses(toKill1);

-			killProcesses = self->getProcesses(toKill1);
-			TraceEvent("RemoveAndKill").detail("Step", "mark first processes excluded").detail("Addresses", describe(toKill1))
-				.detail("AddressTotal", toKill1.size()).detail("Processes", killProcesses.size())
-				.detail("ClusterAvailable", g_simulator.isAvailable());
-			for (auto& killProcess : killProcesses) {
-				killProcess->excluded = true;
-				g_simulator.excludeAddress(killProcess->address);
-				TraceEvent("RemoveAndKill").detail("Step", "MarkProcessFirst").detail("Process", describe(*killProcess));
+		Optional<Void> result = wait( timeout( removeAndKill( self, cx, toKill1, NULL), self->kill1Timeout ) );
+
+		bClearedFirst = result.present();
+
+		TraceEvent("RemoveAndKill").detail("Step", "excluded list first").detail("excluderesult", bClearedFirst ? "succeeded" : "failed").detail("KillTotal", toKill1.size()).detail("Processes", killProcArray.size()).detail("toKill1", describe(toKill1)).detail("ClusterAvailable", g_simulator.isAvailable());
+
+		bClearedFirst=false;
+		// Include the servers, if unable to exclude
+		if (!bClearedFirst) {
+			// Get the updated list of processes which may have changed due to reboots, deletes, etc
+			TraceEvent("RemoveAndKill").detail("Step", "include all first").detail("KillTotal", toKill1.size()).detail("toKill", describe(toKill1)).detail("ClusterAvailable", g_simulator.isAvailable());
+			Void _ = wait( includeServers( cx, vector<AddressExclusion>(1) ) );
+			self->includeAddresses(toKill1);
+			TraceEvent("RemoveAndKill").detail("Step", "included all first").detail("KillTotal", toKill1.size()).detail("toKill", describe(toKill1)).detail("ClusterAvailable", g_simulator.isAvailable());
 		}

-		Optional<Void> result = wait( timeout( removeAndKill( self, cx, toKill1), self->kill1Timeout ) );
-
-		TraceEvent("RemoveAndKill").detail("Step", "first exclusion result").detail("result", result.present() ? "succeeded" : "failed");
-		killProcesses = self->getProcesses(toKill1);
-		TraceEvent("RemoveAndKill").detail("Step", "include first processes").detail("toKill1", describe(toKill1))
-			.detail("KillTotal", toKill1.size()).detail("Processes", killProcesses.size());
-		for (auto& killProcess : killProcesses) {
-			g_simulator.includeAddress(killProcess->address);
-			killProcess->excluded = false;
-		}
-
-		killProcesses = self->protectServers(toKill2);
+		// Get the list of protected servers
+		killProcArray = self->protectServers(toKill2);

 		// Update the kill networks to the killable processes
-		toKill2 = self->getNetworks(killProcesses);
+		toKill2 = self->getNetworks(killProcArray);

-		TraceEvent("RemoveAndKill").detail("Step", "Mark second processes excluded").detail("toKill2", describe(toKill2))
-			.detail("KillTotal", toKill2.size()).detail("Processes", killProcesses.size());
-		for (auto& killProcess : killProcesses) {
-			killProcess->excluded = true;
-			g_simulator.excludeAddress(killProcess->address);
-			TraceEvent("RemoveAndKill").detail("Step", "MarkProcessSecond").detail("Processes", killProcesses.size()).detail("Process", describe(*killProcess));
-		}
+		TraceEvent("RemoveAndKill").detail("Step", "exclude list second").detail("KillTotal", toKill2.size()).detail("toKill", describe(toKill2)).detail("ClusterAvailable", g_simulator.isAvailable());
+		self->excludeAddresses(toKill2);

 		// The second set of machines is selected so that we can always make progress without it, even after the permitted number of other permanent failures
 		// so we expect to succeed after a finite amount of time
 		state Future<Void> disabler = disableConnectionFailuresAfter( self->kill2Timeout/2, "RemoveServersSafely" );
 		TraceEvent("RemoveAndKill").detail("Step", "exclude second list").detail("toKill2", describe(toKill2)).detail("KillTotal", toKill2.size())
-			.detail("Processes", killProcesses.size()).detail("ClusterAvailable", g_simulator.isAvailable());
-		Void _ = wait( reportErrors( timeoutError( removeAndKill( self, cx, toKill2), self->kill2Timeout ), "RemoveServersSafelyError", UID() ) );
+			.detail("Processes", killProcArray.size()).detail("ClusterAvailable", g_simulator.isAvailable());
+		Void _ = wait( reportErrors( timeoutError( removeAndKill( self, cx, toKill2, bClearedFirst ? &toKill1 : NULL), self->kill2Timeout ), "RemoveServersSafelyError", UID() ) );

-
-		TraceEvent("RemoveAndKill").detail("Step", "excluded second list").detail("KillTotal", toKill2.size()).detail("Excluded", killProcesses.size())
-			.detail("ClusterAvailable", g_simulator.isAvailable());
+		TraceEvent("RemoveAndKill").detail("Step", "excluded second list").detail("KillTotal", toKill1.size()).detail("toKill", describe(toKill2)).detail("ClusterAvailable", g_simulator.isAvailable());

 		// Reinclude all of the machine, if buggified
 		if (BUGGIFY) {
-			TraceEvent("RemoveAndKill").detail("Step", "final include all").detail("ClusterAvailable", g_simulator.isAvailable());
+			// Get the updated list of processes which may have changed due to reboots, deletes, etc
+			TraceEvent("RemoveAndKill").detail("Step", "include all second").detail("KillTotal", toKill1.size()).detail("toKill", describe(toKill2)).detail("ClusterAvailable", g_simulator.isAvailable());
 			Void _ = wait( includeServers( cx, vector<AddressExclusion>(1) ) );
-			for (auto& killProcess : killProcesses) {
-				g_simulator.includeAddress(killProcess->address);
-				killProcess->excluded = false;
-			}
-			TraceEvent("RemoveAndKill").detail("Step", "final included all").detail("ClusterAvailable", g_simulator.isAvailable());
+			self->includeAddresses(toKill2);
+			TraceEvent("RemoveAndKill").detail("Step", "included all second").detail("KillTotal", toKill1.size()).detail("toKill", describe(toKill2)).detail("ClusterAvailable", g_simulator.isAvailable());
 		}

 		return Void();
 	}

-	ACTOR static Future<Void> removeAndKill( RemoveServersSafelyWorkload* self, Database cx, std::set<AddressExclusion> toKill)
+	virtual std::vector<ISimulator::ProcessInfo*> killAddresses(std::set<AddressExclusion> const& killAddrs)
 	{
-		// First clear the exclusion list and exclude the given list
-		TraceEvent("RemoveAndKill").detail("Step", "include all").detail("ClusterAvailable", g_simulator.isAvailable());
-		Void _ = wait( includeServers( cx, vector<AddressExclusion>(1) ) );
-		TraceEvent("RemoveAndKill").detail("Step", "included all").detail("ClusterAvailable", g_simulator.isAvailable());
+		UID functionId = g_nondeterministic_random->randomUniqueID();
+		bool removeViaClear = !BUGGIFY;
+		std::vector<ISimulator::ProcessInfo*>	killProcArray;
+		std::vector<AddressExclusion>	toKillArray;

-		state std::vector<ISimulator::ProcessInfo*>	killProcesses;
+		std::copy(killAddrs.begin(), killAddrs.end(), std::back_inserter(toKillArray));
+		killProcArray = getProcesses(killAddrs);
+
+		// Reboot and delete or kill the servers
+		if( killProcesses ) {
+			TraceEvent("RemoveAndKill", functionId).detail("Step", removeViaClear ? "ClearProcesses" : "IgnoreProcesses").detail("Addresses", describe(killAddrs))
+				.detail("Processes", killProcArray.size()).detail("ClusterAvailable", g_simulator.isAvailable()).detail("RemoveViaClear", removeViaClear);
+			for (auto& killProcess : killProcArray) {
+				if (g_simulator.protectedAddresses.count(killProcess->address))
+					TraceEvent("RemoveAndKill", functionId).detail("Step", "NoKill Process").detail("Process", describe(*killProcess)).detail("failed", killProcess->failed).detail("rebooting", killProcess->rebooting).detail("ClusterAvailable", g_simulator.isAvailable()).detail("Protected", g_simulator.protectedAddresses.count(killProcess->address));
+				else if (removeViaClear) {
+					g_simulator.rebootProcess( killProcess, ISimulator::RebootProcessAndDelete);
+					TraceEvent("RemoveAndKill", functionId).detail("Step", "Clear Process").detail("Process", describe(*killProcess)).detail("failed", killProcess->failed).detail("rebooting", killProcess->rebooting).detail("ClusterAvailable", g_simulator.isAvailable()).detail("Protected", g_simulator.protectedAddresses.count(killProcess->address));
+				}
+/*
+				else {
+					g_simulator.killProcess( killProcess, ISimulator::KillInstantly );
+					TraceEvent("RemoveAndKill", functionId).detail("Step", "Kill Process").detail("Process", describe(*killProcess)).detail("failed", killProcess->failed).detail("rebooting", killProcess->rebooting).detail("ClusterAvailable", g_simulator.isAvailable()).detail("Protected", g_simulator.protectedAddresses.count(killProcess->address));
+				}
+*/
+			}
+		}
+		else {
+			std::set<Optional<Standalone<StringRef>>> zoneIds;
+			bool killedMachine;
+			for (auto& killProcess : killProcArray) {
+				zoneIds.insert(killProcess->locality.zoneId());
+			}
+			TraceEvent("RemoveAndKill", functionId).detail("Step", removeViaClear ? "ClearMachines" : "KillMachines").detail("Addresses", describe(killAddrs)).detail("Processes", killProcArray.size()).detail("Zones", zoneIds.size()).detail("ClusterAvailable", g_simulator.isAvailable());
+			for (auto& zoneId : zoneIds) {
+				killedMachine = g_simulator.killMachine( zoneId, removeViaClear ? ISimulator::RebootAndDelete : ISimulator::KillInstantly, removeViaClear);
+				TraceEvent(killedMachine ? SevInfo : SevWarn, "RemoveAndKill").detail("Step", removeViaClear ? "Clear Machine" : "Kill Machine").detailext("ZoneId", zoneId).detail(removeViaClear ? "Cleared" : "Killed", killedMachine).detail("ClusterAvailable", g_simulator.isAvailable());
+			}
+		}
+
+		return killProcArray;
+	}
+
+	ACTOR static Future<Void> removeAndKill( RemoveServersSafelyWorkload* self, Database cx, std::set<AddressExclusion> toKill, std::set<AddressExclusion>* pIncAddrs)
+	{
+		state UID functionId = g_nondeterministic_random->randomUniqueID();
+
+		// First clear the exclusion list and exclude the given list
+		TraceEvent("RemoveAndKill", functionId).detail("Step", "include all").detail("ClusterAvailable", g_simulator.isAvailable());
+		Void _ = wait( includeServers( cx, vector<AddressExclusion>(1) ) );
+		TraceEvent("RemoveAndKill", functionId).detail("Step", "included all").detail("ClusterAvailable", g_simulator.isAvailable());
+		// Reinclude the addresses that were excluded, if present
+		if (pIncAddrs) {
+			self->includeAddresses(*pIncAddrs);
+		}
+
+		state std::vector<ISimulator::ProcessInfo*>	killProcArray;
 		state std::vector<AddressExclusion>	toKillArray;

 		std::copy(toKill.begin(), toKill.end(), std::back_inserter(toKillArray));
-		killProcesses = self->getProcesses(toKill);
+		killProcArray = self->getProcesses(toKill);

-		TraceEvent("RemoveAndKill").detail("Step", "Activate Server Exclusion").detail("toKill", describe(toKill)).detail("Addresses", describe(toKillArray)).detail("ClusterAvailable", g_simulator.isAvailable());
+		TraceEvent("RemoveAndKill", functionId).detail("Step", "Activate Server Exclusion").detail("KillAddrs", toKill.size()).detail("KillProcs", killProcArray.size()).detail("MissingProcs", toKill.size()!=killProcArray.size()).detail("toKill", describe(toKill)).detail("Addresses", describe(toKillArray)).detail("ClusterAvailable", g_simulator.isAvailable());
 		Void _ = wait( excludeServers( cx, toKillArray ) );

 		// We need to skip at least the quorum change if there's nothing to kill, because there might not be enough servers left
 		// alive to do a coordinators auto (?)
 		if (toKill.size()) {
 			// Wait for removal to be safe
-			TraceEvent("RemoveAndKill").detail("Step", "Wait For Server Exclusion").detail("Addresses", describe(toKill)).detail("ClusterAvailable", g_simulator.isAvailable());
+			TraceEvent("RemoveAndKill", functionId).detail("Step", "Wait For Server Exclusion").detail("Addresses", describe(toKill)).detail("ClusterAvailable", g_simulator.isAvailable());
 			Void _ = wait( waitForExcludedServers( cx, toKillArray ) );

-			TraceEvent("RemoveAndKill").detail("Step", "coordinators auto").detail("desiredCoordinators", g_simulator.desiredCoordinators).detail("ClusterAvailable", g_simulator.isAvailable());
+			TraceEvent("RemoveAndKill", functionId).detail("Step", "coordinators auto").detail("desiredCoordinators", g_simulator.desiredCoordinators).detail("ClusterAvailable", g_simulator.isAvailable());

 			// Setup the coordinators BEFORE the exclusion
 			// Otherwise, we may end up with NotEnoughMachinesForCoordinators
@ -349,38 +426,14 @@ struct RemoveServersSafelyWorkload : TestWorkload {
 					break;
 			}

-			// Reboot and delete or kill the servers
-			if( self->killProcesses ) {
-				TraceEvent("RemoveAndKill").detail("Step", removeClear ? "ClearProcesses" : "KillProcesses").detail("Addresses", describe(toKill))
-					.detail("Processes", killProcesses.size()).detail("ClusterAvailable", g_simulator.isAvailable());
-				for (auto& killProcess : killProcesses) {
-					TraceEvent("RemoveAndKill").detail("Step", removeClear ? "Clear Process" : "Kill Process").detail("Process", describe(*killProcess)).detail("ClusterAvailable", g_simulator.isAvailable()).detail("Protected", g_simulator.protectedAddresses.count(killProcess->address));
-//				ASSERT(g_simulator.protectedAddresses.count(killProcess->address) == 0);
-					if (removeClear)
-						g_simulator.rebootProcess( killProcess, ISimulator::RebootProcessAndDelete);
-					else
-						g_simulator.killProcess( killProcess, ISimulator::KillInstantly );
-				}
-			}
-			else {
-				std::set<Optional<Standalone<StringRef>>> zoneIds;
-				bool killedMachine;
-				for (auto& killProcess : killProcesses) {
-					zoneIds.insert(killProcess->locality.zoneId());
-				}
-				TraceEvent("RemoveAndKill").detail("Step", removeClear ? "ClearMachines" : "KillMachines").detail("Addresses", describe(toKill)).detail("Processes", killProcesses.size()).detail("Zones", zoneIds.size()).detail("ClusterAvailable", g_simulator.isAvailable());
-				for (auto& zoneId : zoneIds) {
-					killedMachine = g_simulator.killMachine( zoneId, removeClear ? ISimulator::RebootAndDelete : ISimulator::KillInstantly, removeClear ? true : false );
-					TraceEvent(killedMachine ? SevInfo : SevWarn, "RemoveAndKill").detail("Step", removeClear ? "Clear Machine" : "Kill Machine").detailext("ZoneId", zoneId).detail(removeClear ? "Cleared" : "Killed", killedMachine).detail("ClusterAvailable", g_simulator.isAvailable());
-				}
-			}
+			self->killAddresses(toKill);
 		}
 		else
 		{
-			TraceEvent("RemoveAndKill").detail("Step", "nothing to clear").detail("ClusterAvailable", g_simulator.isAvailable());
+			TraceEvent("RemoveAndKill", functionId).detail("Step", "nothing to clear").detail("ClusterAvailable", g_simulator.isAvailable());
 		}

-		TraceEvent("RemoveAndKill").detail("Step", "done").detail("ClusterAvailable", g_simulator.isAvailable());
+		TraceEvent("RemoveAndKill", functionId).detail("Step", "done").detail("ClusterAvailable", g_simulator.isAvailable());

 		return Void();
 	}
--- a/flow/TDMetric.actor.h
+++ b/flow/TDMetric.actor.h
@ -680,7 +680,7 @@ struct TimeDescriptor {
 };

 struct BaseMetric {
-	BaseMetric(MetricNameRef const &name) : metricName(name), pCollection(nullptr), registered(false) {
+	BaseMetric(MetricNameRef const &name) : metricName(name), pCollection(nullptr), registered(false), enabled(false) {
 		setConfig(false);
 	}
 	virtual ~BaseMetric() {
--- a/layers/directory/directory.py
+++ b/layers/directory/directory.py
@ -1,260 +0,0 @@
-#
-# directory.py
-#
-# This source file is part of the FoundationDB open source project
-#
-# Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# 
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from subspace import Subspace
-import fdb, fdb.tuple
-import random, struct
-
-fdb.api_version(100)
-
-#TODO: Error class
-
-class HighContentionAllocator (object):
-    def __init__(self, subspace):
-        self.counters = subspace[0]
-        self.recent = subspace[1]
-
-    @fdb.transactional
-    def allocate( self, tr ):
-        """Returns a byte string which
-            (1) has never and will never be returned by another call to HighContentionAllocator.allocate() on the same subspace
-            (2) is nearly as short as possible given the above"""
-
-        [(start, count)] = [ (self.counters.unpack(k)[0],struct.unpack("<q",v)[0]) for k,v in tr.snapshot.get_range( self.counters.range().start, self.counters.range().stop, limit=1, reverse=True ) ] or [ (0,0) ]
-
-        window = self._window_size(start)
-        if (count+1)*2 >= window:
-            # Advance the window
-            del tr[ self.counters : self.counters[start].key()+chr(0) ]
-            start += window
-            del tr[ self.recent : self.recent[start] ]
-            window = self._window_size(start)
-
-        # Increment the allocation count for the current window
-        tr.add( self.counters[start], struct.pack("<q", 1) )
-
-        while True:
-            # As of the snapshot we are reading from, the window is less than half full, so
-            # this should be expected 2 tries.  Under high contention (and when the window advances),
-            # there is an additional subsequent risk of conflict for this transaction.
-            candidate = random.randint( start, start+window )
-            if tr[ self.recent[candidate] ] == None:
-                tr[ self.recent[candidate] ] = ""
-                return fdb.tuple.pack( (candidate,) )
-
-    def _window_size(self, start):
-        # Larger window sizes are better for high contention, smaller for keeping the keys small.  But if
-        # there are lots of allocations the keys can't be too small.  So start small and scale up.  We don't
-        # want this to ever get *too* big because we have to store about window_size/2 recent items.
-        if start < 255: return 64
-        if start < 65535: return 1024
-        return 8192
-
-class DirectoryLayer (object):
-    def __init__(self, node_subspace = Subspace( rawPrefix="\xfe" ), content_subspace = Subspace() ):
-        self.content_subspace = content_subspace
-        self.node_subspace = node_subspace
-        # The root node is the one whose contents are the node subspace
-        self.root_node = self.node_subspace[ self.node_subspace.key() ]
-        self.allocator = HighContentionAllocator( self.root_node['hca'] )
-
-    @fdb.transactional
-    def create_or_open( self, tr, path, layer=None, prefix=None, allow_create=True, allow_open=True ):
-        """Opens the directory with the given path.
-        If the directory does not exist, it is created (creating parent directories if necessary).
-        If prefix is specified, the directory is created with the given physical prefix; otherwise a prefix is allocated automatically.
-        If layer is specified, it is checked against the layer of an existing directory or set as the layer of a new directory."""
-        if isinstance(path, str): path=(path,)
-        if not path: raise ValueError( "The root directory may not be opened." )  # Because it contains node metadata!
-        existing_node = self._find(tr, path)
-        if existing_node:
-            if not allow_open: raise ValueError("The directory already exists.")
-            existing_layer = tr[ existing_node['layer'].key() ]
-            if layer and existing_layer and existing_layer != layer:
-                raise ValueError( "The directory exists but was created with an incompatible layer." )
-            return self._contents_of_node(existing_node, path, existing_layer)
-        if not allow_create: raise ValueError("The directory does not exist.")
-
-        if prefix==None:
-            prefix = self.allocator.allocate(tr)
-
-        if not self._is_prefix_free(tr, prefix):
-            raise ValueError("The given prefix is already in use.")
-
-        if path[:-1]:
-            parent_node = self._node_with_prefix( self.create_or_open(tr, path[:-1], layer=None).key() )
-        else:
-            parent_node = self.root_node
-        #parent_node = self._find(tr, path[:-1])
-        if not parent_node:
-            print repr(path[:-1])
-            raise ValueError("The parent directory doesn't exist.")
-
-        node = self._node_with_prefix(prefix)
-        tr[ parent_node[self.SUBDIRS][ path[-1] ].key() ] = prefix
-        if layer: tr[ node['layer'].key() ] = layer
-
-        return self._contents_of_node(node, path, layer)
-
-    def open( self, db_or_tr, path, layer=None ):
-        """Opens the directory with the given path.
-        If the directory does not exist, an error is raised.
-        If layer is specified, and a different layer was specified when the directory was created, an error is raised."""
-        return self.create_or_open(db_or_tr, path, layer, allow_create=False)
-    def create( self, db_or_tr, path, layer=None, prefix=None ):
-        """Creates a directory with the given path (creating parent directories if necessary).
-        If the given directory already exists, an error is raised.
-        If prefix is specified, the directory is created with the given physical prefix; otherwise a prefix is allocated automatically.
-        If layer is specified, it is recorded with the directory and will be checked by future calls to open."""
-        return self.create_or_open(db_or_tr, path, layer, prefix, allow_open=False)
-
-    @fdb.transactional
-    def move( self, tr, old_path, new_path ):
-        """Moves the directory found at `old_path` to `new_path`.
-        There is no effect on the physical prefix of the given directory, or on clients that already have the directory open.
-        If the old directory does not exist, a directory already exists at `new_path`, or the parent directory of `new_path`
-        does not exist, an error is raised."""
-        if isinstance(old_path, str): old_path=(old_path,)
-        if isinstance(new_path, str): new_path=(new_path,)
-        if self._find(tr, new_path): raise ValueError( "The destination directory already exists.  Remove it first." )
-        old_node = self._find(tr, old_path)
-        if not old_node: raise ValueError("The source directory does not exist.")
-        parent_node = self._find(tr, new_path[:-1] )
-        if not parent_node: raise ValueError( "The parent of the destination directory does not exist.  Create it first." )
-        tr[ parent_node[self.SUBDIRS][ new_path[-1] ].key() ] = self._contents_of_node( old_node, None ).key()
-        self._remove_from_parent( tr, old_path )
-        return self._contents_of_node( old_node, new_path, tr[ old_node['layer'].key() ] )
-
-    @fdb.transactional
-    def remove( self, tr, path ):
-        """Removes the directory, its contents and all subdirectories transactionally.
-        Warning: Clients which have already opened the directory might still insert data into its contents after it is removed."""
-        if isinstance(path, str): path=(path,)
-        n = self._find(tr, path)
-        if not n: raise ValueError( "The directory doesn't exist." )
-        self._remove_recursive(tr, n)
-        self._remove_from_parent(tr, path)
-
-    @fdb.transactional
-    def list( self, tr, path=() ):
-        if isinstance(path, str): path=(path,)
-        node = self._find( tr, path)
-        if not node:
-            raise ValueError("The given directory does not exist.")
-        return [name for name, cnode in self._subdir_names_and_nodes(tr, node)]
-
-    ### IMPLEMENTATION ###
-    SUBDIRS=0
-
-    def _node_containing_key(self, tr, key):
-        # Right now this is only used for _is_prefix_free(), but if we add parent pointers to directory nodes,
-        # it could also be used to find a path based on a key
-        if key.startswith(self.node_subspace.key()):
-            return self.root_node
-        for k,v in tr.get_range( self.node_subspace.range( () ).start,
-                                 self.node_subspace.pack( (key,) )+"\x00",
-                                 reverse=True,
-                                 limit=1 ):
-            prev_prefix = self.node_subspace.unpack( k )[0]
-            if key.startswith(prev_prefix):
-                return Subspace( rawPrefix=k ) # self.node_subspace[prev_prefix]
-        return None
-
-    def _node_with_prefix( self, prefix ):
-        if prefix==None: return None
-        return self.node_subspace[prefix]
-
-    def _contents_of_node( self, node, path, layer=None ):
-        prefix = self.node_subspace.unpack( node.key() )[0]
-        return DirectorySubspace( path, prefix, self, layer )
-
-    def _find( self, tr, path ):
-        n = self.root_node
-        for name in path:
-            n = self._node_with_prefix( tr[ n[self.SUBDIRS][name].key() ] )
-            if n == None:
-                return None
-        return n
-
-    def _subdir_names_and_nodes( self, tr, node ):
-        sd = node[self.SUBDIRS]
-        for k,v in tr[sd.range(())]:
-            yield sd.unpack(k)[0], self._node_with_prefix( v )
-
-    def _remove_from_parent( self, tr, path ):
-        parent = self._find( tr, path[:-1] )
-        del tr[ parent[self.SUBDIRS][ path[-1] ].key() ]
-
-    def _remove_recursive( self, tr, node):
-        for name, sn in self._subdir_names_and_nodes(tr, node):
-            self._remove_recursive(tr, sn)
-        tr.clear_range_startswith( self._contents_of_node(node,None).key() )
-        del tr[ node.range(()) ]
-
-    def _is_prefix_free( self, tr, prefix ):
-        # Returns true if the given prefix does not intersect any currently allocated prefix
-        # (including the root node).  This means that it neither contains any other prefix nor
-        # is contained by any other prefix.
-        return prefix and not self._node_containing_key( tr, prefix ) and not len(list(tr.get_range( self.node_subspace.pack( (prefix,) ), self.node_subspace.pack( (strinc(prefix),) ), limit=1 )))
-
-directory = DirectoryLayer()
-
-class DirectorySubspace (Subspace):
-    # A DirectorySubspace represents the *contents* of a directory, but it also remembers
-    # the path it was opened with and offers convenience methods to operate on the directory
-    # at that path.
-    def __init__(self, path, prefix, directoryLayer=directory, layer=None):
-        Subspace.__init__(self, rawPrefix=prefix)
-        self.path = path
-        self.directoryLayer = directoryLayer
-        self.layer = layer
-
-    def __repr__(self):
-        return 'DirectorySubspace(' + repr(self.path) + ',' + repr(self.rawPrefix) + ')'
-
-    def check_layer(self, layer):
-        if layer and self.layer and layer!=self.layer:
-            raise ValueError("The directory was created with an incompatible layer.")
-
-    def create_or_open( self, db_or_tr, name_or_path, layer=None, prefix=None ):
-        if not isinstance( name_or_path, tuple ): name_or_path = (name_or_path,)
-        return self.directoryLayer.create_or_open( db_or_tr, self.path + name_or_path, layer, prefix )
-    def open( self, db_or_tr, name_or_path, layer=None ):
-        if not isinstance( name_or_path, tuple ): name_or_path = (name_or_path,)
-        return self.directoryLayer.open( db_or_tr, self.path + name_or_path, layer )
-    def create( self, db_or_tr, name_or_path, layer=None ):
-        if not isinstance( name_or_path, tuple ): name_or_path = (name_or_path,)
-        return self.directoryLayer.create( db_or_tr, self.path + name_or_path, layer )
-    def move_to( self, db_or_tr, new_path ):
-        return self.directoryLayer.moveTo( db_or_tr, self.path, new_path )
-    def remove( self, db_or_tr ):
-        return self.directoryLayer.remove( db_or_tr, self.path )
-    def list( self, db_or_tr ):
-        return self.directoryLayer.list( db_or_tr, self.path )
-
-def random_key():
-    return uuid.uuid4().bytes
-
-def strinc(key):
-    lastc = (ord(key[-1:]) + 1) % 256
-    if lastc:
-        return key[:-1] + chr(lastc)
-    else:
-        return strinc(key[:-1]) + chr(lastc)
--- a/layers/directory/dirtest2.py
+++ b/layers/directory/dirtest2.py
@ -1,79 +0,0 @@
-#
-# dirtest2.py
-#
-# This source file is part of the FoundationDB open source project
-#
-# Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# 
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import fdb, fdb.tuple
-fdb.api_version(23)
-
-from subspace import Subspace
-from directory import directory, DirectoryLayer
-
-def is_error(f):
-    try:
-        f()
-        return False
-    except:
-        return True
-
-db = fdb.open()
-del db[:]
-
-print directory.create( db, 'evil', prefix="\x14" )
-directory = DirectoryLayer( content_subspace = Subspace(rawPrefix="\x01") )
-
-# Make a new directory
-stuff = directory.create( db, ('stuff',) )
-print 'stuff is in', stuff
-print 'stuff[0] is', fdb.tuple.unpack( stuff[0].key() )
-#assert stuff.key() == "\x01\x14"
-
-# Open it again
-stuff2 = directory.open( db, ('stuff',) )
-assert stuff2.key() == stuff.key()
-
-# Make another directory
-items = directory.create_or_open( db, ('items',) )
-print 'items are in', items
-#assert items.key() == "\x01\x15\x01"
-
-# List the root directory
-assert directory.list(db, ()) == ['evil','items','stuff']
-
-# Move everything into an 'app' directory
-app = directory.create( db, ('app',) )
-directory.move( db, ('stuff',), ('app','stuff') )
-directory.move( db, ('items',), ('app','items') )
-
-# Make a directory in a hard-coded place
-special = directory.create_or_open( db, ('app', 'special'), prefix="\x00" )
-assert special.key() == "\x00"
-
-assert directory.list(db, ()) == ['app','evil']
-assert directory.list(db, ("app",)) == ['items', 'special', 'stuff']
-
-assert directory.open( db, ('app', 'stuff') ).key() == stuff.key()
-
-# Destroy the stuff directory
-directory.remove( db, ('app', 'stuff') )
-assert is_error( lambda: directory.open( db, ('app','stuff')) )
-assert directory.list(db, ("app",)) == ['items', 'special']
-
-# Test that items is still OK
-items2 = directory.create_or_open( db, ('app','items') )
-assert items.key() == items.key()
--- a/layers/directory/subspace.py
+++ b/layers/directory/subspace.py
@ -1,58 +0,0 @@
-#
-# subspace.py
-#
-# This source file is part of the FoundationDB open source project
-#
-# Copyright 2013-2018 Apple Inc. and the FoundationDB project authors
-# 
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-# 
-#     http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-###################################
-# This defines a Subspace of keys #
-###################################
-
-import fdb.tuple
-
-class Subspace (object):
-    def __init__(self, prefixTuple=tuple(), rawPrefix=""):
-        self.rawPrefix = rawPrefix + fdb.tuple.pack(prefixTuple)
-
-    def __repr__(self):
-        return 'Subspace(rawPrefix=' + repr(self.rawPrefix) + ')'
-
-    def __getitem__(self, name):
-        return Subspace( (name,), self.rawPrefix )
-   
-    def key(self):
-        return self.rawPrefix
-
-    def pack(self, t = tuple()):
-        return self.rawPrefix + fdb.tuple.pack( t )
-    
-    def unpack(self, key):
-        assert key.startswith(self.rawPrefix)
-        return fdb.tuple.unpack(key[len(self.rawPrefix):])
-    
-    def range(self, t = tuple()):
-        p = fdb.tuple.range( t )
-        return slice(self.rawPrefix + p.start, self.rawPrefix + p.stop)
-
-    def contains(self, key):
-        return key.startswith(self.rawPrefix)
-
-    def as_foundationdb_key(self):
-        return self.rawPrefix
-
-    def subspace(self, tuple):
-        return Subspace( tuple, self.rawPrefix )
--- a/packaging/msi/FDBInstaller.wxs
+++ b/packaging/msi/FDBInstaller.wxs
@ -32,7 +32,7 @@

 <Wix xmlns='http://schemas.microsoft.com/wix/2006/wi'>
  <Product Name='$(var.Title)'
-           Id='{137B53CD-E95D-450D-B16C-268F2855EA13}'
+           Id='{06EE6C90-3838-4C25-95D6-A4716F8CE7D0}'
           UpgradeCode='{A95EA002-686E-4164-8356-C715B7F8B1C8}'
           Version='$(var.Version)'
           Manufacturer='$(var.Manufacturer)'
--- a/tests/python_tests/python_performance.py
+++ b/tests/python_tests/python_performance.py
@ -275,7 +275,7 @@ class PythonPerformance(PythonTest):

        for i in range(count):
            index = random.randint(0, self.key_count)
-            list(tr[self.key(index):self.key(index+1)])
+            list(tr.get_range(self.key(index), self.key(index+1), limit=2))

        return count / (time.time() - s)