powerpc/eeh: Fix stale cached primary bus
When PE is created, its primary bus is cached to pe->bus. At later
point, the cached primary bus is returned from eeh_pe_bus_get().
However, we could get stale cached primary bus and run into kernel
crash in one case: full hotplug as part of fenced PHB error recovery
releases all PCI busses under the PHB at unplugging time and recreate
them at plugging time. pe->bus is still dereferencing the PCI bus
that was released.
This adds another PE flag (EEH_PE_PRI_BUS) to represent the validity
of pe->bus. pe->bus is updated when its first child EEH device is
online and the flag is set. Before unplugging in full hotplug for
error recovery, the flag is cleared.
Fixes: 8cdb2833
("powerpc/eeh: Trace PCI bus from PE")
Cc: stable@vger.kernel.org #v3.11+
Reported-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Reported-by: Pradipta Ghosh <pradghos@in.ibm.com>
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Tested-by: Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
This commit is contained in:
parent
126df08c52
commit
05ba75f848
|
@ -81,6 +81,7 @@ struct pci_dn;
|
|||
#define EEH_PE_KEEP (1 << 8) /* Keep PE on hotplug */
|
||||
#define EEH_PE_CFG_RESTRICTED (1 << 9) /* Block config on error */
|
||||
#define EEH_PE_REMOVED (1 << 10) /* Removed permanently */
|
||||
#define EEH_PE_PRI_BUS (1 << 11) /* Cached primary bus */
|
||||
|
||||
struct eeh_pe {
|
||||
int type; /* PE type: PHB/Bus/Device */
|
||||
|
|
|
@ -564,6 +564,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
|
|||
*/
|
||||
eeh_pe_state_mark(pe, EEH_PE_KEEP);
|
||||
if (bus) {
|
||||
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
|
||||
pci_lock_rescan_remove();
|
||||
pcibios_remove_pci_devices(bus);
|
||||
pci_unlock_rescan_remove();
|
||||
|
@ -803,6 +804,7 @@ perm_error:
|
|||
* the their PCI config any more.
|
||||
*/
|
||||
if (frozen_bus) {
|
||||
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
|
||||
eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
|
||||
|
||||
pci_lock_rescan_remove();
|
||||
|
@ -886,6 +888,7 @@ static void eeh_handle_special_event(void)
|
|||
continue;
|
||||
|
||||
/* Notify all devices to be down */
|
||||
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
|
||||
bus = eeh_pe_bus_get(phb_pe);
|
||||
eeh_pe_dev_traverse(pe,
|
||||
eeh_report_failure, NULL);
|
||||
|
|
|
@ -928,7 +928,7 @@ struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe)
|
|||
bus = pe->phb->bus;
|
||||
} else if (pe->type & EEH_PE_BUS ||
|
||||
pe->type & EEH_PE_DEVICE) {
|
||||
if (pe->bus) {
|
||||
if (pe->state & EEH_PE_PRI_BUS) {
|
||||
bus = pe->bus;
|
||||
goto out;
|
||||
}
|
||||
|
|
|
@ -444,9 +444,12 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void *data)
|
|||
* PCI devices of the PE are expected to be removed prior
|
||||
* to PE reset.
|
||||
*/
|
||||
if (!edev->pe->bus)
|
||||
if (!(edev->pe->state & EEH_PE_PRI_BUS)) {
|
||||
edev->pe->bus = pci_find_bus(hose->global_number,
|
||||
pdn->busno);
|
||||
if (edev->pe->bus)
|
||||
edev->pe->state |= EEH_PE_PRI_BUS;
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable EEH explicitly so that we will do EEH check
|
||||
|
|
Loading…
Reference in New Issue