fix a race is the LLDB test suite results collection

The race boiled down to this:

If a test worker queue is able to run the test inferior and
clean up before the dosep.py listener socket is spun up, and
the worker queue is the last one (as would be the case when
there's only one test rerunning in the rerun queue), then
the test suite will exit the main loop before having a chance
to process any test events coming from the test inferior or
the worker queue job control.

I found this race to be far more likely on fast hardware.
Our Linux CI is one such example.  While it will show
up primarily during meta test events generated by
a worker thread when a test inferior times out or
exits with an exceptional exit (e.g. seg fault), it only
requires that the OS takes longer to hook up the
listener socket than it takes for the final test inferior
and worker thread to shut down.

See:
http://reviews.llvm.org/D19214

reviewed by:
Pavel Labath

llvm-svn: 266624
This commit is contained in:
Todd Fiala 2016-04-18 16:09:21 +00:00
parent ec4f40b6ee
commit 430309f13a
4 changed files with 32 additions and 6 deletions

View File

@ -109,13 +109,17 @@ def setup_global_variables(
global GET_WORKER_INDEX
GET_WORKER_INDEX = get_worker_index_use_pid
def report_test_failure(name, command, output):
def report_test_failure(name, command, output, timeout):
global output_lock
with output_lock:
if not (RESULTS_FORMATTER and RESULTS_FORMATTER.is_using_terminal()):
print(file=sys.stderr)
print(output, file=sys.stderr)
print("[%s FAILED]" % name, file=sys.stderr)
if timeout:
timeout_str = " (TIMEOUT)"
else:
timeout_str = ""
print("[%s FAILED]%s" % (name, timeout_str), file=sys.stderr)
print("Command invoked: %s" % ' '.join(command), file=sys.stderr)
update_progress(name)
@ -211,7 +215,7 @@ class DoTestProcessDriver(process_control.ProcessDriver):
# only stderr does.
report_test_pass(self.file_name, output[1])
else:
report_test_failure(self.file_name, command, output[1])
report_test_failure(self.file_name, command, output[1], was_timeout)
# Save off the results for the caller.
self.results = (

View File

@ -55,6 +55,14 @@ class UnpicklingForwardingReaderChannel(asyncore.dispatcher):
# unpickled results.
raise Exception("forwarding function must be set")
# Initiate all connections by sending an ack. This allows
# the initiators of the socket to await this to ensure
# that this end is up and running (and therefore already
# into the async map).
ack_bytes = bytearray()
ack_bytes.append(chr(42))
file_object.send(ack_bytes)
def deserialize_payload(self):
"""Unpickles the collected input buffer bytes and forwards."""
if len(self.ibuffer) > 0:

View File

@ -3,19 +3,21 @@ from __future__ import print_function
import time
import lldbsuite.test.lldbtest as lldbtest
import lldbsuite.test.decorators as decorators
import rerun_base
class RerunTimeoutTestCase(rerun_base.RerunBaseTestCase):
@lldbtest.no_debug_info_test
@decorators.no_debug_info_test
def test_timeout_rerun_succeeds(self):
"""Tests that timeout logic kicks in and is picked up."""
"""Tests that the timeout logic kicks in and that this timeout is picked up."""
if not self.should_generate_issue():
# We pass this time.
return
# We time out this time.
while True:
# noinspection PyBroadException
try:
time.sleep(1)
except:

View File

@ -76,6 +76,18 @@ def create_results_formatter(config):
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
sock.connect(("localhost", port))
# Wait for the ack from the listener side.
# This is needed to prevent a race condition
# in the main dosep.py processing loop: we
# can't allow a worker queue thread to die
# that has outstanding messages to a listener
# socket before the listener socket asyncore
# listener socket gets spun up; otherwise,
# we lose the test result info.
read_bytes = sock.recv(1)
# print("\n** socket creation: received ack: {}".format(ord(read_bytes[0])), file=sys.stderr)
return (sock, lambda: socket_closer(sock))
default_formatter_name = None