Merge branch 'lorenzo/pci/hv'

* lorenzo/pci/hv:
  PCI: hv: Only queue new work items in hv_pci_devices_present() if necessary
  PCI: hv: Remove the bogus test in hv_eject_device_work()
  PCI: hv: Fix a comment typo in _hv_pcifront_read_config()
  PCI: hv: Fix 2 hang issues in hv_compose_msi_msg()
  PCI: hv: Serialize the present and eject work items
This commit is contained in:
Bjorn Helgaas 2018-04-04 13:28:50 -05:00 committed by Bjorn Helgaas
commit 84d4d6f882
1 changed files with 87 additions and 25 deletions

View File

@ -447,7 +447,6 @@ struct hv_pcibus_device {
spinlock_t device_list_lock; /* Protect lists below */
void __iomem *cfg_addr;
struct semaphore enum_sem;
struct list_head resources_for_children;
struct list_head children;
@ -461,6 +460,8 @@ struct hv_pcibus_device {
struct retarget_msi_interrupt retarget_msi_interrupt_params;
spinlock_t retarget_msi_interrupt_lock;
struct workqueue_struct *wq;
};
/*
@ -520,6 +521,8 @@ struct hv_pci_compl {
s32 completion_status;
};
static void hv_pci_onchannelcallback(void *context);
/**
* hv_pci_generic_compl() - Invoked for a completion packet
* @context: Set up by the sender of the packet.
@ -653,7 +656,7 @@ static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where,
break;
}
/*
* Make sure the write was done before we release the spinlock
* Make sure the read was done before we release the spinlock
* allowing consecutive reads/writes.
*/
mb();
@ -664,6 +667,31 @@ static void _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where,
}
}
static u16 hv_pcifront_get_vendor_id(struct hv_pci_dev *hpdev)
{
u16 ret;
unsigned long flags;
void __iomem *addr = hpdev->hbus->cfg_addr + CFG_PAGE_OFFSET +
PCI_VENDOR_ID;
spin_lock_irqsave(&hpdev->hbus->config_lock, flags);
/* Choose the function to be read. (See comment above) */
writel(hpdev->desc.win_slot.slot, hpdev->hbus->cfg_addr);
/* Make sure the function was chosen before we start reading. */
mb();
/* Read from that function's config space. */
ret = readw(addr);
/*
* mb() is not required here, because the spin_unlock_irqrestore()
* is a barrier.
*/
spin_unlock_irqrestore(&hpdev->hbus->config_lock, flags);
return ret;
}
/**
* _hv_pcifront_write_config() - Internal PCI config write
* @hpdev: The PCI driver's representation of the device
@ -1106,8 +1134,37 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
* Since this function is called with IRQ locks held, can't
* do normal wait for completion; instead poll.
*/
while (!try_wait_for_completion(&comp.comp_pkt.host_event))
while (!try_wait_for_completion(&comp.comp_pkt.host_event)) {
/* 0xFFFF means an invalid PCI VENDOR ID. */
if (hv_pcifront_get_vendor_id(hpdev) == 0xFFFF) {
dev_err_once(&hbus->hdev->device,
"the device has gone\n");
goto free_int_desc;
}
/*
* When the higher level interrupt code calls us with
* interrupt disabled, we must poll the channel by calling
* the channel callback directly when channel->target_cpu is
* the current CPU. When the higher level interrupt code
* calls us with interrupt enabled, let's add the
* local_bh_disable()/enable() to avoid race.
*/
local_bh_disable();
if (hbus->hdev->channel->target_cpu == smp_processor_id())
hv_pci_onchannelcallback(hbus);
local_bh_enable();
if (hpdev->state == hv_pcichild_ejecting) {
dev_err_once(&hbus->hdev->device,
"the device is being ejected\n");
goto free_int_desc;
}
udelay(100);
}
if (comp.comp_pkt.completion_status < 0) {
dev_err(&hbus->hdev->device,
@ -1590,12 +1647,8 @@ static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus,
* It must also treat the omission of a previously observed device as
* notification that the device no longer exists.
*
* Note that this function is a work item, and it may not be
* invoked in the order that it was queued. Back to back
* updates of the list of present devices may involve queuing
* multiple work items, and this one may run before ones that
* were sent later. As such, this function only does something
* if is the last one in the queue.
* Note that this function is serialized with hv_eject_device_work(),
* because both are pushed to the ordered workqueue hbus->wq.
*/
static void pci_devices_present_work(struct work_struct *work)
{
@ -1616,11 +1669,6 @@ static void pci_devices_present_work(struct work_struct *work)
INIT_LIST_HEAD(&removed);
if (down_interruptible(&hbus->enum_sem)) {
put_hvpcibus(hbus);
return;
}
/* Pull this off the queue and process it if it was the last one. */
spin_lock_irqsave(&hbus->device_list_lock, flags);
while (!list_empty(&hbus->dr_list)) {
@ -1637,7 +1685,6 @@ static void pci_devices_present_work(struct work_struct *work)
spin_unlock_irqrestore(&hbus->device_list_lock, flags);
if (!dr) {
up(&hbus->enum_sem);
put_hvpcibus(hbus);
return;
}
@ -1724,7 +1771,6 @@ static void pci_devices_present_work(struct work_struct *work)
break;
}
up(&hbus->enum_sem);
put_hvpcibus(hbus);
kfree(dr);
}
@ -1743,6 +1789,7 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
struct hv_dr_state *dr;
struct hv_dr_work *dr_wrk;
unsigned long flags;
bool pending_dr;
dr_wrk = kzalloc(sizeof(*dr_wrk), GFP_NOWAIT);
if (!dr_wrk)
@ -1766,11 +1813,21 @@ static void hv_pci_devices_present(struct hv_pcibus_device *hbus,
}
spin_lock_irqsave(&hbus->device_list_lock, flags);
/*
* If pending_dr is true, we have already queued a work,
* which will see the new dr. Otherwise, we need to
* queue a new work.
*/
pending_dr = !list_empty(&hbus->dr_list);
list_add_tail(&dr->list_entry, &hbus->dr_list);
spin_unlock_irqrestore(&hbus->device_list_lock, flags);
get_hvpcibus(hbus);
schedule_work(&dr_wrk->wrk);
if (pending_dr) {
kfree(dr_wrk);
} else {
get_hvpcibus(hbus);
queue_work(hbus->wq, &dr_wrk->wrk);
}
}
/**
@ -1796,10 +1853,7 @@ static void hv_eject_device_work(struct work_struct *work)
hpdev = container_of(work, struct hv_pci_dev, wrk);
if (hpdev->state != hv_pcichild_ejecting) {
put_pcichild(hpdev, hv_pcidev_ref_pnp);
return;
}
WARN_ON(hpdev->state != hv_pcichild_ejecting);
/*
* Ejection can come before or after the PCI bus has been set up, so
@ -1848,7 +1902,7 @@ static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
get_pcichild(hpdev, hv_pcidev_ref_pnp);
INIT_WORK(&hpdev->wrk, hv_eject_device_work);
get_hvpcibus(hpdev->hbus);
schedule_work(&hpdev->wrk);
queue_work(hpdev->hbus->wq, &hpdev->wrk);
}
/**
@ -2461,13 +2515,18 @@ static int hv_pci_probe(struct hv_device *hdev,
spin_lock_init(&hbus->config_lock);
spin_lock_init(&hbus->device_list_lock);
spin_lock_init(&hbus->retarget_msi_interrupt_lock);
sema_init(&hbus->enum_sem, 1);
init_completion(&hbus->remove_event);
hbus->wq = alloc_ordered_workqueue("hv_pci_%x", 0,
hbus->sysdata.domain);
if (!hbus->wq) {
ret = -ENOMEM;
goto free_bus;
}
ret = vmbus_open(hdev->channel, pci_ring_size, pci_ring_size, NULL, 0,
hv_pci_onchannelcallback, hbus);
if (ret)
goto free_bus;
goto destroy_wq;
hv_set_drvdata(hdev, hbus);
@ -2536,6 +2595,8 @@ free_config:
hv_free_config_window(hbus);
close:
vmbus_close(hdev->channel);
destroy_wq:
destroy_workqueue(hbus->wq);
free_bus:
free_page((unsigned long)hbus);
return ret;
@ -2615,6 +2676,7 @@ static int hv_pci_remove(struct hv_device *hdev)
irq_domain_free_fwnode(hbus->sysdata.fwnode);
put_hvpcibus(hbus);
wait_for_completion(&hbus->remove_event);
destroy_workqueue(hbus->wq);
free_page((unsigned long)hbus);
return 0;
}