ipmi: move message error checking to avoid deadlock
V1->V2: in handle_one_rcv_msg, if data_size > 2, set requeue to zero and goto out instead of calling ipmi_free_msg. Kosuke Tatsukawa <tatsu@ab.jp.nec.com> In the source stack trace below, function set_need_watch tries to take out the same si_lock that was taken earlier by ipmi_thread. ipmi_thread() [drivers/char/ipmi/ipmi_si_intf.c:995] smi_event_handler() [drivers/char/ipmi/ipmi_si_intf.c:765] handle_transaction_done() [drivers/char/ipmi/ipmi_si_intf.c:555] deliver_recv_msg() [drivers/char/ipmi/ipmi_si_intf.c:283] ipmi_smi_msg_received() [drivers/char/ipmi/ipmi_msghandler.c:4503] intf_err_seq() [drivers/char/ipmi/ipmi_msghandler.c:1149] smi_remove_watch() [drivers/char/ipmi/ipmi_msghandler.c:999] set_need_watch() [drivers/char/ipmi/ipmi_si_intf.c:1066] Upstream commite1891cffd4
adds code to ipmi_smi_msg_received() to call smi_remove_watch() via intf_err_seq() and this seems to be causing the deadlock. commite1891cffd4
Author: Corey Minyard <cminyard@mvista.com> Date: Wed Oct 24 15:17:04 2018 -0500 ipmi: Make the smi watcher be disabled immediately when not needed The fix is to put all messages in the queue and move the message checking code out of ipmi_smi_msg_received and into handle_one_recv_msg, which processes the message checking after ipmi_thread releases its locks. Additionally,Kosuke Tatsukawa <tatsu@ab.jp.nec.com> reported that handle_new_recv_msgs calls ipmi_free_msg when handle_one_rcv_msg returns zero, so that the call to ipmi_free_msg in handle_one_rcv_msg introduced another panic when "ipmitool sensor list" was run in a loop. He submitted this part of the patch. +free_msg: + requeue = 0; + goto out; Reported by: Osamu Samukawa <osa-samukawa@tg.jp.nec.com> Characterized by: Kosuke Tatsukawa <tatsu@ab.jp.nec.com> Signed-off-by: Tony Camuso <tcamuso@redhat.com> Fixes:e1891cffd4
("ipmi: Make the smi watcher be disabled immediately when not needed") Cc: stable@vger.kernel.org # 5.1 Signed-off-by: Corey Minyard <cminyard@mvista.com>
This commit is contained in:
parent
c4436c9149
commit
383035211c
|
@ -4218,7 +4218,53 @@ static int handle_one_recv_msg(struct ipmi_smi *intf,
|
||||||
int chan;
|
int chan;
|
||||||
|
|
||||||
ipmi_debug_msg("Recv:", msg->rsp, msg->rsp_size);
|
ipmi_debug_msg("Recv:", msg->rsp, msg->rsp_size);
|
||||||
if (msg->rsp_size < 2) {
|
|
||||||
|
if ((msg->data_size >= 2)
|
||||||
|
&& (msg->data[0] == (IPMI_NETFN_APP_REQUEST << 2))
|
||||||
|
&& (msg->data[1] == IPMI_SEND_MSG_CMD)
|
||||||
|
&& (msg->user_data == NULL)) {
|
||||||
|
|
||||||
|
if (intf->in_shutdown)
|
||||||
|
goto free_msg;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is the local response to a command send, start
|
||||||
|
* the timer for these. The user_data will not be
|
||||||
|
* NULL if this is a response send, and we will let
|
||||||
|
* response sends just go through.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check for errors, if we get certain errors (ones
|
||||||
|
* that mean basically we can try again later), we
|
||||||
|
* ignore them and start the timer. Otherwise we
|
||||||
|
* report the error immediately.
|
||||||
|
*/
|
||||||
|
if ((msg->rsp_size >= 3) && (msg->rsp[2] != 0)
|
||||||
|
&& (msg->rsp[2] != IPMI_NODE_BUSY_ERR)
|
||||||
|
&& (msg->rsp[2] != IPMI_LOST_ARBITRATION_ERR)
|
||||||
|
&& (msg->rsp[2] != IPMI_BUS_ERR)
|
||||||
|
&& (msg->rsp[2] != IPMI_NAK_ON_WRITE_ERR)) {
|
||||||
|
int ch = msg->rsp[3] & 0xf;
|
||||||
|
struct ipmi_channel *chans;
|
||||||
|
|
||||||
|
/* Got an error sending the message, handle it. */
|
||||||
|
|
||||||
|
chans = READ_ONCE(intf->channel_list)->c;
|
||||||
|
if ((chans[ch].medium == IPMI_CHANNEL_MEDIUM_8023LAN)
|
||||||
|
|| (chans[ch].medium == IPMI_CHANNEL_MEDIUM_ASYNC))
|
||||||
|
ipmi_inc_stat(intf, sent_lan_command_errs);
|
||||||
|
else
|
||||||
|
ipmi_inc_stat(intf, sent_ipmb_command_errs);
|
||||||
|
intf_err_seq(intf, msg->msgid, msg->rsp[2]);
|
||||||
|
} else
|
||||||
|
/* The message was sent, start the timer. */
|
||||||
|
intf_start_seq_timer(intf, msg->msgid);
|
||||||
|
free_msg:
|
||||||
|
requeue = 0;
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
} else if (msg->rsp_size < 2) {
|
||||||
/* Message is too small to be correct. */
|
/* Message is too small to be correct. */
|
||||||
dev_warn(intf->si_dev,
|
dev_warn(intf->si_dev,
|
||||||
"BMC returned too small a message for netfn %x cmd %x, got %d bytes\n",
|
"BMC returned too small a message for netfn %x cmd %x, got %d bytes\n",
|
||||||
|
@ -4475,62 +4521,16 @@ void ipmi_smi_msg_received(struct ipmi_smi *intf,
|
||||||
unsigned long flags = 0; /* keep us warning-free. */
|
unsigned long flags = 0; /* keep us warning-free. */
|
||||||
int run_to_completion = intf->run_to_completion;
|
int run_to_completion = intf->run_to_completion;
|
||||||
|
|
||||||
if ((msg->data_size >= 2)
|
/*
|
||||||
&& (msg->data[0] == (IPMI_NETFN_APP_REQUEST << 2))
|
* To preserve message order, we keep a queue and deliver from
|
||||||
&& (msg->data[1] == IPMI_SEND_MSG_CMD)
|
* a tasklet.
|
||||||
&& (msg->user_data == NULL)) {
|
*/
|
||||||
|
if (!run_to_completion)
|
||||||
if (intf->in_shutdown)
|
spin_lock_irqsave(&intf->waiting_rcv_msgs_lock, flags);
|
||||||
goto free_msg;
|
list_add_tail(&msg->link, &intf->waiting_rcv_msgs);
|
||||||
|
if (!run_to_completion)
|
||||||
/*
|
spin_unlock_irqrestore(&intf->waiting_rcv_msgs_lock,
|
||||||
* This is the local response to a command send, start
|
flags);
|
||||||
* the timer for these. The user_data will not be
|
|
||||||
* NULL if this is a response send, and we will let
|
|
||||||
* response sends just go through.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Check for errors, if we get certain errors (ones
|
|
||||||
* that mean basically we can try again later), we
|
|
||||||
* ignore them and start the timer. Otherwise we
|
|
||||||
* report the error immediately.
|
|
||||||
*/
|
|
||||||
if ((msg->rsp_size >= 3) && (msg->rsp[2] != 0)
|
|
||||||
&& (msg->rsp[2] != IPMI_NODE_BUSY_ERR)
|
|
||||||
&& (msg->rsp[2] != IPMI_LOST_ARBITRATION_ERR)
|
|
||||||
&& (msg->rsp[2] != IPMI_BUS_ERR)
|
|
||||||
&& (msg->rsp[2] != IPMI_NAK_ON_WRITE_ERR)) {
|
|
||||||
int ch = msg->rsp[3] & 0xf;
|
|
||||||
struct ipmi_channel *chans;
|
|
||||||
|
|
||||||
/* Got an error sending the message, handle it. */
|
|
||||||
|
|
||||||
chans = READ_ONCE(intf->channel_list)->c;
|
|
||||||
if ((chans[ch].medium == IPMI_CHANNEL_MEDIUM_8023LAN)
|
|
||||||
|| (chans[ch].medium == IPMI_CHANNEL_MEDIUM_ASYNC))
|
|
||||||
ipmi_inc_stat(intf, sent_lan_command_errs);
|
|
||||||
else
|
|
||||||
ipmi_inc_stat(intf, sent_ipmb_command_errs);
|
|
||||||
intf_err_seq(intf, msg->msgid, msg->rsp[2]);
|
|
||||||
} else
|
|
||||||
/* The message was sent, start the timer. */
|
|
||||||
intf_start_seq_timer(intf, msg->msgid);
|
|
||||||
|
|
||||||
free_msg:
|
|
||||||
ipmi_free_smi_msg(msg);
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* To preserve message order, we keep a queue and deliver from
|
|
||||||
* a tasklet.
|
|
||||||
*/
|
|
||||||
if (!run_to_completion)
|
|
||||||
spin_lock_irqsave(&intf->waiting_rcv_msgs_lock, flags);
|
|
||||||
list_add_tail(&msg->link, &intf->waiting_rcv_msgs);
|
|
||||||
if (!run_to_completion)
|
|
||||||
spin_unlock_irqrestore(&intf->waiting_rcv_msgs_lock,
|
|
||||||
flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!run_to_completion)
|
if (!run_to_completion)
|
||||||
spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
|
spin_lock_irqsave(&intf->xmit_msgs_lock, flags);
|
||||||
|
|
Loading…
Reference in New Issue