[lit] Support sharding testsuites, for parallel execution.

Summary: This change equips lit.py with two new options, --num-shards=M and --run-shard=N (set by default from env vars LIT_NUM_SHARDS and LIT_RUN_SHARD). The options must be used together, and N must be in 1..M. Together these options effect only test selection: they partition the testsuite into M equal-sized "shards", then select only the Nth shard. They can be used in a cluster of test machines to achieve a very crude (static) form of parallelism, with minimal configuration work. Reviewers: modocache, ddunbar Reviewed By: ddunbar Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D28789 llvm-svn: 292417
2017-01-18 18:12:20 +00:00 · 2017-01-18 18:12:20 +00:00 · ae5d7bb4f5
parent f86cca1a42
commit ae5d7bb4f5
3 changed files with 138 additions and 0 deletions
--- a/llvm/docs/CommandGuide/lit.rst
+++ b/llvm/docs/CommandGuide/lit.rst
@ -152,6 +152,23 @@ SELECTION OPTIONS

 Run the tests in a random order.

+.. option:: --num-shards=M
+
+ Divide the set of selected tests into ``M`` equal-sized subsets or
+ "shards", and run only one of them.  Must be used with the
+ ``--run-shard=N`` option, which selects the shard to run. The environment
+ variable ``LIT_NUM_SHARDS`` can also be used in place of this
+ option. These two options provide a coarse mechanism for paritioning large
+ testsuites, for parallel execution on separate machines (say in a large
+ testing farm).
+
+.. option:: --run-shard=N
+
+ Select which shard to run, assuming the ``--num-shards=M`` option was
+ provided. The two options must be used together, and the value of ``N``
+ must be in the range ``1..M``. The environment variable
+ ``LIT_RUN_SHARD`` can also be used in place of this option.
+
 ADDITIONAL OPTIONS
 ------------------

--- a/llvm/utils/lit/lit/main.py
+++ b/llvm/utils/lit/lit/main.py
@ -259,6 +259,14 @@ def main_with_tmp(builtinParameters):
                     help=("Only run tests with paths matching the given "
                           "regular expression"),
                     action="store", default=None)
+    selection_group.add_argument("--num-shards", dest="numShards", metavar="M",
+                     help="Split testsuite into M pieces and only run one",
+                     action="store", type=int,
+                     default=os.environ.get("LIT_NUM_SHARDS"))
+    selection_group.add_argument("--run-shard", dest="runShard", metavar="N",
+                     help="Run shard #N of the testsuite",
+                     action="store", type=int,
+                     default=os.environ.get("LIT_RUN_SHARD"))

    debug_group = parser.add_argument_group("Debug and Experimental Options")
    debug_group.add_argument("--debug",
@ -399,6 +407,29 @@ def main_with_tmp(builtinParameters):
    else:
        run.tests.sort(key = lambda t: (not t.isEarlyTest(), t.getFullName()))

+    # Then optionally restrict our attention to a shard of the tests.
+    if (opts.numShards is not None) or (opts.runShard is not None):
+        if (opts.numShards is None) or (opts.runShard is None):
+            parser.error("--num-shards and --run-shard must be used together")
+        if opts.numShards <= 0:
+            parser.error("--num-shards must be positive")
+        if (opts.runShard < 1) or (opts.runShard > opts.numShards):
+            parser.error("--run-shard must be between 1 and --num-shards (inclusive)")
+        num_tests = len(run.tests)
+        # Note: user views tests and shard numbers counting from 1.
+        test_ixs = range(opts.runShard - 1, num_tests, opts.numShards)
+        run.tests = [run.tests[i] for i in test_ixs]
+        # Generate a preview of the first few test indices in the shard
+        # to accompany the arithmetic expression, for clarity.
+        preview_len = 3
+        ix_preview = ", ".join([str(i+1) for i in test_ixs[:preview_len]])
+        if len(test_ixs) > preview_len:
+            ix_preview += ", ..."
+        litConfig.note('Selecting shard %d/%d = size %d/%d = tests #(%d*k)+%d = [%s]' %
+                       (opts.runShard, opts.numShards,
+                        len(run.tests), num_tests,
+                        opts.numShards, opts.runShard, ix_preview))
+
    # Finally limit the number of tests, if desired.
    if opts.maxTests is not None:
        run.tests = run.tests[:opts.maxTests]
--- a/llvm/utils/lit/tests/selecting.py
+++ b/llvm/utils/lit/tests/selecting.py
@ -0,0 +1,90 @@
+# RUN: %{lit} %{inputs}/discovery | FileCheck --check-prefix=CHECK-BASIC %s
+# CHECK-BASIC: Testing: 5 tests
+
+
+# Check that regex-filtering works
+#
+# RUN: %{lit} --filter 'o[a-z]e' %{inputs}/discovery | FileCheck --check-prefix=CHECK-FILTER %s
+# CHECK-FILTER: Testing: 2 of 5 tests
+
+
+# Check that maximum counts work
+#
+# RUN: %{lit} --max-tests 3 %{inputs}/discovery | FileCheck --check-prefix=CHECK-MAX %s
+# CHECK-MAX: Testing: 3 of 5 tests
+
+
+# Check that sharding partitions the testsuite in a way that distributes the
+# rounding error nicely (i.e. 5/3 => 2 2 1, not 1 1 3 or whatever)
+#
+# RUN: %{lit} --num-shards 3 --run-shard 1 %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD0-ERR < %t.err %s
+# RUN: FileCheck --check-prefix=CHECK-SHARD0-OUT < %t.out %s
+# CHECK-SHARD0-ERR: note: Selecting shard 1/3 = size 2/5 = tests #(3*k)+1 = [1, 4]
+# CHECK-SHARD0-OUT: Testing: 2 of 5 tests
+#
+# RUN: %{lit} --num-shards 3 --run-shard 2 %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD1-ERR < %t.err %s
+# RUN: FileCheck --check-prefix=CHECK-SHARD1-OUT < %t.out %s
+# CHECK-SHARD1-ERR: note: Selecting shard 2/3 = size 2/5 = tests #(3*k)+2 = [2, 5]
+# CHECK-SHARD1-OUT: Testing: 2 of 5 tests
+#
+# RUN: %{lit} --num-shards 3 --run-shard 3 %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD2-ERR < %t.err %s
+# RUN: FileCheck --check-prefix=CHECK-SHARD2-OUT < %t.out %s
+# CHECK-SHARD2-ERR: note: Selecting shard 3/3 = size 1/5 = tests #(3*k)+3 = [3]
+# CHECK-SHARD2-OUT: Testing: 1 of 5 tests
+
+
+# Check that sharding via env vars works.
+#
+# RUN: env LIT_NUM_SHARDS=3 LIT_RUN_SHARD=1 %{lit} %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD0-ENV-ERR < %t.err %s
+# RUN: FileCheck --check-prefix=CHECK-SHARD0-ENV-OUT < %t.out %s
+# CHECK-SHARD0-ENV-ERR: note: Selecting shard 1/3 = size 2/5 = tests #(3*k)+1 = [1, 4]
+# CHECK-SHARD0-ENV-OUT: Testing: 2 of 5 tests
+#
+# RUN: env LIT_NUM_SHARDS=3 LIT_RUN_SHARD=2 %{lit} %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD1-ENV-ERR < %t.err %s
+# RUN: FileCheck --check-prefix=CHECK-SHARD1-ENV-OUT < %t.out %s
+# CHECK-SHARD1-ENV-ERR: note: Selecting shard 2/3 = size 2/5 = tests #(3*k)+2 = [2, 5]
+# CHECK-SHARD1-ENV-OUT: Testing: 2 of 5 tests
+#
+# RUN: env LIT_NUM_SHARDS=3 LIT_RUN_SHARD=3 %{lit} %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD2-ENV-ERR < %t.err %s
+# RUN: FileCheck --check-prefix=CHECK-SHARD2-ENV-OUT < %t.out %s
+# CHECK-SHARD2-ENV-ERR: note: Selecting shard 3/3 = size 1/5 = tests #(3*k)+3 = [3]
+# CHECK-SHARD2-ENV-OUT: Testing: 1 of 5 tests
+
+
+# Check that providing more shards than tests results in 1 test per shard
+# until we run out, then 0.
+#
+# RUN: %{lit} --num-shards 100 --run-shard 2 %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD-BIG-ERR1 < %t.err %s
+# RUN: FileCheck --check-prefix=CHECK-SHARD-BIG-OUT1 < %t.out %s
+# CHECK-SHARD-BIG-ERR1: note: Selecting shard 2/100 = size 1/5 = tests #(100*k)+2 = [2]
+# CHECK-SHARD-BIG-OUT1: Testing: 1 of 5 tests
+#
+# RUN: %{lit} --num-shards 100 --run-shard 6 %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD-BIG-ERR2 < %t.err %s
+# RUN: FileCheck --check-prefix=CHECK-SHARD-BIG-OUT2 < %t.out %s
+# CHECK-SHARD-BIG-ERR2: note: Selecting shard 6/100 = size 0/5 = tests #(100*k)+6 = []
+# CHECK-SHARD-BIG-OUT2: Testing: 0 of 5 tests
+#
+# RUN: %{lit} --num-shards 100 --run-shard 50 %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD-BIG-ERR3 < %t.err %s
+# RUN: FileCheck --check-prefix=CHECK-SHARD-BIG-OUT3 < %t.out %s
+# CHECK-SHARD-BIG-ERR3: note: Selecting shard 50/100 = size 0/5 = tests #(100*k)+50 = []
+# CHECK-SHARD-BIG-OUT3: Testing: 0 of 5 tests
+
+
+# Check that range constraints are enforced
+#
+# RUN: not %{lit} --num-shards 0 --run-shard 2 %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD-ERR < %t.err %s
+# CHECK-SHARD-ERR: error: --num-shards must be positive
+#
+# RUN: not %{lit} --num-shards 3 --run-shard 4 %{inputs}/discovery >%t.out 2>%t.err
+# RUN: FileCheck --check-prefix=CHECK-SHARD-ERR2 < %t.err %s
+# CHECK-SHARD-ERR2: error: --run-shard must be between 1 and --num-shards (inclusive)