Fix race during process interruption

Summary:
The following situation was occuring in TestAttachResume:
- we did a "continue" from a breakpoint (which involves a private start-stop to step over the
  breakpoint)
- after receiving the stop-reply from the step-over, we issue a "detach" (which requires a
  process interrupt)
- at this moment, the public state is "running", private state is "about-to-be-stopped" (the
  stopped event was broadcast, but it was not received yet)
- StopForDestroyOrDetach (public thread) notes the public state is running, sends an interrupt
  request to the private thread
- private thread gets the eBroadcastBitInterrupt (before the eStateStopped message), and asks the
  process plugin to stop (via Halt())
- process plugin says it has nothing to do as the process is already stopped
- private thread shrugs and carries on. receives the stop event, restores the breakpoint and
  resumes the process.
- after a while, the public thread times out and says it failed to stop the process

This patch does the following:
- splits Halt() into two functions, private and public, their usage depends on the context
  - public Halt(): sends eBroadcastBitInterrupt to the private thread and waits for the Stop
    event
  - HaltPrivate(): asks the plugin to stop and makes a note that the halt was requested. When the
    next stop event comes it sets the interrupt flag on it.
- removes HijackPrivateProcessEvents(), as the only user (old Halt()) has gone away
- removes the m_currently_handling_event hack, as the new Halt() does not need it
- adds a use_run_lock parameter to public Halt() and WaitForProcessToStop(). This was needed
  because RunThreadPlan uses Halt() while holding the run lock and we don't want Halt() to take
  it away from him.

Reviewers: clayborg, jingham

Subscribers: lldb-commits

Differential Revision: http://reviews.llvm.org/D14989

llvm-svn: 254403
This commit is contained in:
Pavel Labath 2015-12-01 11:28:47 +00:00
parent 4aef7ef881
commit df55f522bd
4 changed files with 108 additions and 126 deletions

View File

@ -1355,6 +1355,7 @@ public:
Error
ResumeSynchronous (Stream *stream);
//------------------------------------------------------------------
/// Halts a running process.
///
@ -1367,12 +1368,15 @@ public:
/// @param[in] clear_thread_plans
/// If true, when the process stops, clear all thread plans.
///
/// @param[in] use_run_lock
/// Whether to release the run lock after the stop.
///
/// @return
/// Returns an error object. If the error is empty, the process is halted.
/// otherwise the halt has failed.
//------------------------------------------------------------------
Error
Halt (bool clear_thread_plans = false);
Halt (bool clear_thread_plans = false, bool use_run_lock = true);
//------------------------------------------------------------------
/// Detaches from a running or stopped process.
@ -1683,9 +1687,8 @@ public:
/// DoHalt must produce one and only one stop StateChanged event if it actually
/// stops the process. If the stop happens through some natural event (for
/// instance a SIGSTOP), then forwarding that event will do. Otherwise, you must
/// generate the event manually. Note also, the private event thread is stopped when
/// DoHalt is run to prevent the events generated while halting to trigger
/// other state changes before the halt is complete.
/// generate the event manually. This function is called from the context of the
/// private state thread.
///
/// @param[out] caused_stop
/// If true, then this Halt caused the stop, otherwise, the
@ -2861,12 +2864,16 @@ public:
// Returns the process state when it is stopped. If specified, event_sp_ptr
// is set to the event which triggered the stop. If wait_always = false,
// and the process is already stopped, this function returns immediately.
// If the process is hijacked and use_run_lock is true (the default), then this
// function releases the run lock after the stop. Setting use_run_lock to false
// will avoid this behavior.
lldb::StateType
WaitForProcessToStop(const TimeValue *timeout,
lldb::EventSP *event_sp_ptr = nullptr,
bool wait_always = true,
Listener *hijack_listener = nullptr,
Stream *stream = nullptr);
Stream *stream = nullptr,
bool use_run_lock = true);
uint32_t
GetIOHandlerID () const
@ -3281,12 +3288,6 @@ protected:
std::string m_exit_string;
};
bool
HijackPrivateProcessEvents (Listener *listener);
void
RestorePrivateProcessEvents ();
bool
PrivateStateThreadIsValid () const
{
@ -3372,7 +3373,6 @@ protected:
std::vector<PreResumeCallbackAndBaton> m_pre_resume_actions;
ProcessRunLock m_public_run_lock;
ProcessRunLock m_private_run_lock;
Predicate<bool> m_currently_handling_event; // This predicate is set in HandlePrivateEvent while all its business is being done.
ArchSpec::StopInfoOverrideCallbackType m_stop_info_override_callback;
bool m_currently_handling_do_on_removals;
bool m_resume_requested; // If m_currently_handling_event or m_currently_handling_do_on_removals are true, Resume will only request a resume, using this flag to check.
@ -3436,6 +3436,9 @@ protected:
void
HandlePrivateEvent (lldb::EventSP &event_sp);
Error
HaltPrivate();
lldb::StateType
WaitForProcessStopPrivate (const TimeValue *timeout, lldb::EventSP &event_sp);

View File

@ -57,7 +57,7 @@ class ExprCommandWithTimeoutsTestCase(TestBase):
frame = thread.GetFrameAtIndex(0)
value = frame.EvaluateExpression ("wait_a_while (50000)", options)
value = frame.EvaluateExpression ("wait_a_while (200000)", options)
self.assertTrue (value.IsValid())
self.assertFalse (value.GetError().Success())
@ -65,7 +65,7 @@ class ExprCommandWithTimeoutsTestCase(TestBase):
interp = self.dbg.GetCommandInterpreter()
result = lldb.SBCommandReturnObject()
return_value = interp.HandleCommand ("expr -t 100 -u true -- wait_a_while(50000)", result)
return_value = interp.HandleCommand ("expr -t 100 -u true -- wait_a_while(200000)", result)
self.assertTrue (return_value == lldb.eReturnStatusFailed)
# Okay, now do it again with long enough time outs:

View File

@ -20,7 +20,6 @@ class AttachResumeTestCase(TestBase):
@skipIfRemote
@expectedFailureFreeBSD('llvm.org/pr19310')
@expectedFailureWindows("llvm.org/pr24778")
@expectedFlakeyLinux('llvm.org/pr19310')
def test_attach_continue_interrupt_detach(self):
"""Test attach/continue/interrupt/detach"""
self.build()
@ -52,6 +51,9 @@ class AttachResumeTestCase(TestBase):
self.runCmd("process interrupt")
lldbutil.expect_state_changes(self, listener, [lldb.eStateStopped])
# Second interrupt should have no effect.
self.expect("process interrupt", patterns=["Process is not running"], error=True)
# check that this breakpoint is auto-cleared on detach (r204752)
self.runCmd("br set -f main.cpp -l %u" % (line_number('main.cpp', '// Set breakpoint here')))

View File

@ -759,7 +759,6 @@ Process::Process(lldb::TargetSP target_sp, Listener &listener, const UnixSignals
m_next_event_action_ap(),
m_public_run_lock (),
m_private_run_lock (),
m_currently_handling_event(false),
m_stop_info_override_callback (NULL),
m_finalizing (false),
m_finalize_called (false),
@ -992,7 +991,8 @@ Process::WaitForProcessToStop (const TimeValue *timeout,
EventSP *event_sp_ptr,
bool wait_always,
Listener *hijack_listener,
Stream *stream)
Stream *stream,
bool use_run_lock)
{
// We can't just wait for a "stopped" event, because the stopped event may have restarted the target.
// We have to actually check each event, and in the case of a stopped event check the restarted flag
@ -1019,7 +1019,7 @@ Process::WaitForProcessToStop (const TimeValue *timeout,
__FUNCTION__);
// We need to toggle the run lock as this won't get done in
// SetPublicState() if the process is hijacked.
if (hijack_listener)
if (hijack_listener && use_run_lock)
m_public_run_lock.SetStopped();
return state;
}
@ -1042,7 +1042,7 @@ Process::WaitForProcessToStop (const TimeValue *timeout,
case eStateUnloaded:
// We need to toggle the run lock as this won't get done in
// SetPublicState() if the process is hijacked.
if (hijack_listener)
if (hijack_listener && use_run_lock)
m_public_run_lock.SetStopped();
return state;
case eStateStopped:
@ -1052,7 +1052,7 @@ Process::WaitForProcessToStop (const TimeValue *timeout,
{
// We need to toggle the run lock as this won't get done in
// SetPublicState() if the process is hijacked.
if (hijack_listener)
if (hijack_listener && use_run_lock)
m_public_run_lock.SetStopped();
return state;
}
@ -1308,23 +1308,6 @@ Process::RestoreProcessEvents ()
RestoreBroadcaster();
}
bool
Process::HijackPrivateProcessEvents (Listener *listener)
{
if (listener != NULL)
{
return m_private_state_broadcaster.HijackBroadcaster(listener, eBroadcastBitStateChanged | eBroadcastBitInterrupt);
}
else
return false;
}
void
Process::RestorePrivateProcessEvents ()
{
m_private_state_broadcaster.RestoreBroadcaster();
}
StateType
Process::WaitForStateChangedEvents (const TimeValue *timeout, EventSP &event_sp, Listener *hijack_listener)
{
@ -3831,101 +3814,50 @@ Process::PrivateResume ()
}
Error
Process::Halt (bool clear_thread_plans)
Process::Halt (bool clear_thread_plans, bool use_run_lock)
{
if (! StateIsRunningState(m_public_state.GetValue()))
return Error("Process is not running.");
// Don't clear the m_clear_thread_plans_on_stop, only set it to true if
// in case it was already set and some thread plan logic calls halt on its
// own.
m_clear_thread_plans_on_stop |= clear_thread_plans;
// First make sure we aren't in the middle of handling an event, or we might restart. This is pretty weak, since
// we could just straightaway get another event. It just narrows the window...
m_currently_handling_event.WaitForValueEqualTo(false);
// Pause our private state thread so we can ensure no one else eats
// the stop event out from under us.
Listener halt_listener ("lldb.process.halt_listener");
HijackPrivateProcessEvents(&halt_listener);
HijackProcessEvents(&halt_listener);
EventSP event_sp;
Error error (WillHalt());
bool restored_process_events = false;
if (error.Success())
SendAsyncInterrupt();
if (m_public_state.GetValue() == eStateAttaching)
{
bool caused_stop = false;
// Ask the process subclass to actually halt our process
error = DoHalt(caused_stop);
if (error.Success())
{
if (m_public_state.GetValue() == eStateAttaching)
{
// Don't hijack and eat the eStateExited as the code that was doing
// the attach will be waiting for this event...
RestorePrivateProcessEvents();
restored_process_events = true;
SetExitStatus(SIGKILL, "Cancelled async attach.");
Destroy (false);
}
else
{
// If "caused_stop" is true, then DoHalt stopped the process. If
// "caused_stop" is false, the process was already stopped.
// If the DoHalt caused the process to stop, then we want to catch
// this event and set the interrupted bool to true before we pass
// this along so clients know that the process was interrupted by
// a halt command.
if (caused_stop)
{
// Wait for 1 second for the process to stop.
TimeValue timeout_time;
timeout_time = TimeValue::Now();
timeout_time.OffsetWithSeconds(10);
bool got_event = halt_listener.WaitForEvent (&timeout_time, event_sp);
StateType state = ProcessEventData::GetStateFromEvent(event_sp.get());
if (!got_event || state == eStateInvalid)
{
// We timeout out and didn't get a stop event...
error.SetErrorStringWithFormat ("Halt timed out. State = %s", StateAsCString(GetState()));
}
else
{
if (StateIsStoppedState (state, false))
{
// We caused the process to interrupt itself, so mark this
// as such in the stop event so clients can tell an interrupted
// process from a natural stop
ProcessEventData::SetInterruptedInEvent (event_sp.get(), true);
}
else
{
Log *log(lldb_private::GetLogIfAllCategoriesSet (LIBLLDB_LOG_PROCESS));
if (log)
log->Printf("Process::Halt() failed to stop, state is: %s", StateAsCString(state));
error.SetErrorString ("Did not get stopped event after halt.");
}
}
}
DidHalt();
}
}
// Don't hijack and eat the eStateExited as the code that was doing
// the attach will be waiting for this event...
RestoreProcessEvents();
SetExitStatus(SIGKILL, "Cancelled async attach.");
Destroy (false);
return Error();
}
// Resume our private state thread before we post the event (if any)
if (!restored_process_events)
RestorePrivateProcessEvents();
// Post any event we might have consumed. If all goes well, we will have
// stopped the process, intercepted the event and set the interrupted
// bool in the event. Post it to the private event queue and that will end up
// correctly setting the state.
if (event_sp)
m_private_state_broadcaster.BroadcastEvent(event_sp);
// Wait for 10 second for the process to stop.
TimeValue timeout_time;
timeout_time = TimeValue::Now();
timeout_time.OffsetWithSeconds(10);
StateType state = WaitForProcessToStop(&timeout_time, &event_sp, true, &halt_listener,
nullptr, use_run_lock);
RestoreProcessEvents();
return error;
if (state == eStateInvalid || ! event_sp)
{
// We timed out and didn't get a stop event...
return Error("Halt timed out. State = %s", StateAsCString(GetState()));
}
BroadcastEvent(event_sp);
return Error();
}
Error
@ -4461,8 +4393,6 @@ Process::HandlePrivateEvent (EventSP &event_sp)
Log *log(lldb_private::GetLogIfAllCategoriesSet (LIBLLDB_LOG_PROCESS));
m_resume_requested = false;
m_currently_handling_event.SetValue(true, eBroadcastNever);
const StateType new_state = Process::ProcessEventData::GetStateFromEvent(event_sp.get());
// First check to see if anybody wants a shot at this event:
@ -4489,7 +4419,6 @@ Process::HandlePrivateEvent (EventSP &event_sp)
{
// FIXME: should cons up an exited event, and discard this one.
SetExitStatus(0, m_next_event_action_ap->GetExitString());
m_currently_handling_event.SetValue(false, eBroadcastAlways);
SetNextEventAction(NULL);
return;
}
@ -4579,7 +4508,22 @@ Process::HandlePrivateEvent (EventSP &event_sp)
StateAsCString (GetState ()));
}
}
m_currently_handling_event.SetValue(false, eBroadcastAlways);
}
Error
Process::HaltPrivate()
{
EventSP event_sp;
Error error (WillHalt());
if (error.Fail())
return error;
// Ask the process subclass to actually halt our process
bool caused_stop;
error = DoHalt(caused_stop);
DidHalt();
return error;
}
thread_result_t
@ -4602,6 +4546,7 @@ Process::RunPrivateStateThread (bool is_secondary_thread)
__FUNCTION__, static_cast<void*>(this), GetID());
bool exit_now = false;
bool interrupt_requested = false;
while (!exit_now)
{
EventSP event_sp;
@ -4647,7 +4592,16 @@ Process::RunPrivateStateThread (bool is_secondary_thread)
log->Printf ("Process::%s (arg = %p, pid = %" PRIu64 ") woke up with an interrupt - Halting.",
__FUNCTION__, static_cast<void*>(this),
GetID());
Halt();
Error error = HaltPrivate();
if (error.Fail() && log)
log->Printf ("Process::%s (arg = %p, pid = %" PRIu64 ") failed to halt the process: %s",
__FUNCTION__, static_cast<void*>(this),
GetID(), error.AsCString());
// Halt should generate a stopped event. Make a note of the fact that we were
// doing the interrupt, so we can set the interrupted flag after we receive the
// event. We deliberately set this to true even if HaltPrivate failed, so that we
// can interrupt on the next natural stop.
interrupt_requested = true;
}
continue;
}
@ -4662,6 +4616,23 @@ Process::RunPrivateStateThread (bool is_secondary_thread)
m_clear_thread_plans_on_stop = false;
m_thread_list.DiscardThreadPlans();
}
if (interrupt_requested)
{
if (StateIsStoppedState (internal_state, true))
{
// We requested the interrupt, so mark this as such in the stop event so
// clients can tell an interrupted process from a natural stop
ProcessEventData::SetInterruptedInEvent (event_sp.get(), true);
interrupt_requested = false;
}
else if (log)
{
log->Printf("Process::%s interrupt_requested, but a non-stopped state '%s' received.",
__FUNCTION__, StateAsCString(internal_state));
}
}
HandlePrivateEvent (event_sp);
}
@ -5744,7 +5715,9 @@ Process::RunThreadPlan (ExecutionContext &exe_ctx,
{
// This is probably an overabundance of caution, I don't think I should ever get a stopped & restarted
// event here. But if I do, the best thing is to Halt and then get out of here.
Halt();
const bool clear_thread_plans = false;
const bool use_run_lock = false;
Halt(clear_thread_plans, use_run_lock);
}
errors.Printf("Didn't get running event after initial resume, got %s instead.",
@ -5838,7 +5811,9 @@ Process::RunThreadPlan (ExecutionContext &exe_ctx,
bool keep_going = false;
if (event_sp->GetType() == eBroadcastBitInterrupt)
{
Halt();
const bool clear_thread_plans = false;
const bool use_run_lock = false;
Halt(clear_thread_plans, use_run_lock);
return_value = eExpressionInterrupted;
errors.Printf ("Execution halted by user interrupt.");
if (log)
@ -6015,7 +5990,9 @@ Process::RunThreadPlan (ExecutionContext &exe_ctx,
{
if (log)
log->Printf ("Process::RunThreadPlan(): Running Halt.");
halt_error = Halt();
const bool clear_thread_plans = false;
const bool use_run_lock = false;
Halt(clear_thread_plans, use_run_lock);
}
if (halt_error.Success())
{