Merge branch 'lorenzo/pci/hv'

- fix Hyper-V bus registration failure caused by domain/serial number
    confusion (Sridhar Pitchai)

  - improve Hyper-V refcounting and coding style (Stephen Hemminger)

  - avoid potential Hyper-V hang waiting for a response that will never
    come (Dexuan Cui)

* lorenzo/pci/hv:
  PCI: hv: Do not wait forever on a device that has disappeared
  PCI: hv: Use list_for_each_entry()
  PCI: hv: Convert remove_lock to refcount
  PCI: hv: Remove unused reason for refcount handler
  PCI: hv: Make sure the bus domain is really unique
This commit is contained in:
Bjorn Helgaas 2018-06-06 16:10:35 -05:00
commit 741f8e7ecc
1 changed files with 71 additions and 91 deletions

View File

@ -433,7 +433,7 @@ enum hv_pcibus_state {
struct hv_pcibus_device { struct hv_pcibus_device {
struct pci_sysdata sysdata; struct pci_sysdata sysdata;
enum hv_pcibus_state state; enum hv_pcibus_state state;
atomic_t remove_lock; refcount_t remove_lock;
struct hv_device *hdev; struct hv_device *hdev;
resource_size_t low_mmio_space; resource_size_t low_mmio_space;
resource_size_t high_mmio_space; resource_size_t high_mmio_space;
@ -488,17 +488,6 @@ enum hv_pcichild_state {
hv_pcichild_maximum hv_pcichild_maximum
}; };
enum hv_pcidev_ref_reason {
hv_pcidev_ref_invalid = 0,
hv_pcidev_ref_initial,
hv_pcidev_ref_by_slot,
hv_pcidev_ref_packet,
hv_pcidev_ref_pnp,
hv_pcidev_ref_childlist,
hv_pcidev_irqdata,
hv_pcidev_ref_max
};
struct hv_pci_dev { struct hv_pci_dev {
/* List protected by pci_rescan_remove_lock */ /* List protected by pci_rescan_remove_lock */
struct list_head list_entry; struct list_head list_entry;
@ -548,14 +537,41 @@ static void hv_pci_generic_compl(void *context, struct pci_response *resp,
static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus, static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus,
u32 wslot); u32 wslot);
static void get_pcichild(struct hv_pci_dev *hv_pcidev,
enum hv_pcidev_ref_reason reason); static void get_pcichild(struct hv_pci_dev *hpdev)
static void put_pcichild(struct hv_pci_dev *hv_pcidev, {
enum hv_pcidev_ref_reason reason); refcount_inc(&hpdev->refs);
}
static void put_pcichild(struct hv_pci_dev *hpdev)
{
if (refcount_dec_and_test(&hpdev->refs))
kfree(hpdev);
}
static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus); static void get_hvpcibus(struct hv_pcibus_device *hv_pcibus);
static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus); static void put_hvpcibus(struct hv_pcibus_device *hv_pcibus);
/*
* There is no good way to get notified from vmbus_onoffer_rescind(),
* so let's use polling here, since this is not a hot path.
*/
static int wait_for_response(struct hv_device *hdev,
struct completion *comp)
{
while (true) {
if (hdev->channel->rescind) {
dev_warn_once(&hdev->device, "The device is gone.\n");
return -ENODEV;
}
if (wait_for_completion_timeout(comp, HZ / 10))
break;
}
return 0;
}
/** /**
* devfn_to_wslot() - Convert from Linux PCI slot to Windows * devfn_to_wslot() - Convert from Linux PCI slot to Windows
* @devfn: The Linux representation of PCI slot * @devfn: The Linux representation of PCI slot
@ -762,7 +778,7 @@ static int hv_pcifront_read_config(struct pci_bus *bus, unsigned int devfn,
_hv_pcifront_read_config(hpdev, where, size, val); _hv_pcifront_read_config(hpdev, where, size, val);
put_pcichild(hpdev, hv_pcidev_ref_by_slot); put_pcichild(hpdev);
return PCIBIOS_SUCCESSFUL; return PCIBIOS_SUCCESSFUL;
} }
@ -790,7 +806,7 @@ static int hv_pcifront_write_config(struct pci_bus *bus, unsigned int devfn,
_hv_pcifront_write_config(hpdev, where, size, val); _hv_pcifront_write_config(hpdev, where, size, val);
put_pcichild(hpdev, hv_pcidev_ref_by_slot); put_pcichild(hpdev);
return PCIBIOS_SUCCESSFUL; return PCIBIOS_SUCCESSFUL;
} }
@ -856,7 +872,7 @@ static void hv_msi_free(struct irq_domain *domain, struct msi_domain_info *info,
} }
hv_int_desc_free(hpdev, int_desc); hv_int_desc_free(hpdev, int_desc);
put_pcichild(hpdev, hv_pcidev_ref_by_slot); put_pcichild(hpdev);
} }
static int hv_set_affinity(struct irq_data *data, const struct cpumask *dest, static int hv_set_affinity(struct irq_data *data, const struct cpumask *dest,
@ -1186,13 +1202,13 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
msg->address_lo = comp.int_desc.address & 0xffffffff; msg->address_lo = comp.int_desc.address & 0xffffffff;
msg->data = comp.int_desc.data; msg->data = comp.int_desc.data;
put_pcichild(hpdev, hv_pcidev_ref_by_slot); put_pcichild(hpdev);
return; return;
free_int_desc: free_int_desc:
kfree(int_desc); kfree(int_desc);
drop_reference: drop_reference:
put_pcichild(hpdev, hv_pcidev_ref_by_slot); put_pcichild(hpdev);
return_null_message: return_null_message:
msg->address_hi = 0; msg->address_hi = 0;
msg->address_lo = 0; msg->address_lo = 0;
@ -1283,7 +1299,6 @@ static u64 get_bar_size(u64 bar_val)
*/ */
static void survey_child_resources(struct hv_pcibus_device *hbus) static void survey_child_resources(struct hv_pcibus_device *hbus)
{ {
struct list_head *iter;
struct hv_pci_dev *hpdev; struct hv_pci_dev *hpdev;
resource_size_t bar_size = 0; resource_size_t bar_size = 0;
unsigned long flags; unsigned long flags;
@ -1309,8 +1324,7 @@ static void survey_child_resources(struct hv_pcibus_device *hbus)
* for a child device are a power of 2 in size and aligned in memory, * for a child device are a power of 2 in size and aligned in memory,
* so it's sufficient to just add them up without tracking alignment. * so it's sufficient to just add them up without tracking alignment.
*/ */
list_for_each(iter, &hbus->children) { list_for_each_entry(hpdev, &hbus->children, list_entry) {
hpdev = container_of(iter, struct hv_pci_dev, list_entry);
for (i = 0; i < 6; i++) { for (i = 0; i < 6; i++) {
if (hpdev->probed_bar[i] & PCI_BASE_ADDRESS_SPACE_IO) if (hpdev->probed_bar[i] & PCI_BASE_ADDRESS_SPACE_IO)
dev_err(&hbus->hdev->device, dev_err(&hbus->hdev->device,
@ -1363,7 +1377,6 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus)
resource_size_t low_base = 0; resource_size_t low_base = 0;
resource_size_t bar_size; resource_size_t bar_size;
struct hv_pci_dev *hpdev; struct hv_pci_dev *hpdev;
struct list_head *iter;
unsigned long flags; unsigned long flags;
u64 bar_val; u64 bar_val;
u32 command; u32 command;
@ -1385,9 +1398,7 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus)
/* Pick addresses for the BARs. */ /* Pick addresses for the BARs. */
do { do {
list_for_each(iter, &hbus->children) { list_for_each_entry(hpdev, &hbus->children, list_entry) {
hpdev = container_of(iter, struct hv_pci_dev,
list_entry);
for (i = 0; i < 6; i++) { for (i = 0; i < 6; i++) {
bar_val = hpdev->probed_bar[i]; bar_val = hpdev->probed_bar[i];
if (bar_val == 0) if (bar_val == 0)
@ -1508,19 +1519,6 @@ static void q_resource_requirements(void *context, struct pci_response *resp,
complete(&completion->host_event); complete(&completion->host_event);
} }
static void get_pcichild(struct hv_pci_dev *hpdev,
enum hv_pcidev_ref_reason reason)
{
refcount_inc(&hpdev->refs);
}
static void put_pcichild(struct hv_pci_dev *hpdev,
enum hv_pcidev_ref_reason reason)
{
if (refcount_dec_and_test(&hpdev->refs))
kfree(hpdev);
}
/** /**
* new_pcichild_device() - Create a new child device * new_pcichild_device() - Create a new child device
* @hbus: The internal struct tracking this root PCI bus. * @hbus: The internal struct tracking this root PCI bus.
@ -1568,24 +1566,14 @@ static struct hv_pci_dev *new_pcichild_device(struct hv_pcibus_device *hbus,
if (ret) if (ret)
goto error; goto error;
wait_for_completion(&comp_pkt.host_event); if (wait_for_response(hbus->hdev, &comp_pkt.host_event))
goto error;
hpdev->desc = *desc; hpdev->desc = *desc;
refcount_set(&hpdev->refs, 1); refcount_set(&hpdev->refs, 1);
get_pcichild(hpdev, hv_pcidev_ref_childlist); get_pcichild(hpdev);
spin_lock_irqsave(&hbus->device_list_lock, flags); spin_lock_irqsave(&hbus->device_list_lock, flags);
/*
* When a device is being added to the bus, we set the PCI domain
* number to be the device serial number, which is non-zero and
* unique on the same VM. The serial numbers start with 1, and
* increase by 1 for each device. So device names including this
* can have shorter names than based on the bus instance UUID.
* Only the first device serial number is used for domain, so the
* domain number will not change after the first device is added.
*/
if (list_empty(&hbus->children))
hbus->sysdata.domain = desc->ser;
list_add_tail(&hpdev->list_entry, &hbus->children); list_add_tail(&hpdev->list_entry, &hbus->children);
spin_unlock_irqrestore(&hbus->device_list_lock, flags); spin_unlock_irqrestore(&hbus->device_list_lock, flags);
return hpdev; return hpdev;
@ -1618,7 +1606,7 @@ static struct hv_pci_dev *get_pcichild_wslot(struct hv_pcibus_device *hbus,
list_for_each_entry(iter, &hbus->children, list_entry) { list_for_each_entry(iter, &hbus->children, list_entry) {
if (iter->desc.win_slot.slot == wslot) { if (iter->desc.win_slot.slot == wslot) {
hpdev = iter; hpdev = iter;
get_pcichild(hpdev, hv_pcidev_ref_by_slot); get_pcichild(hpdev);
break; break;
} }
} }
@ -1654,7 +1642,6 @@ static void pci_devices_present_work(struct work_struct *work)
{ {
u32 child_no; u32 child_no;
bool found; bool found;
struct list_head *iter;
struct pci_function_description *new_desc; struct pci_function_description *new_desc;
struct hv_pci_dev *hpdev; struct hv_pci_dev *hpdev;
struct hv_pcibus_device *hbus; struct hv_pcibus_device *hbus;
@ -1691,9 +1678,7 @@ static void pci_devices_present_work(struct work_struct *work)
/* First, mark all existing children as reported missing. */ /* First, mark all existing children as reported missing. */
spin_lock_irqsave(&hbus->device_list_lock, flags); spin_lock_irqsave(&hbus->device_list_lock, flags);
list_for_each(iter, &hbus->children) { list_for_each_entry(hpdev, &hbus->children, list_entry) {
hpdev = container_of(iter, struct hv_pci_dev,
list_entry);
hpdev->reported_missing = true; hpdev->reported_missing = true;
} }
spin_unlock_irqrestore(&hbus->device_list_lock, flags); spin_unlock_irqrestore(&hbus->device_list_lock, flags);
@ -1704,11 +1689,8 @@ static void pci_devices_present_work(struct work_struct *work)
new_desc = &dr->func[child_no]; new_desc = &dr->func[child_no];
spin_lock_irqsave(&hbus->device_list_lock, flags); spin_lock_irqsave(&hbus->device_list_lock, flags);
list_for_each(iter, &hbus->children) { list_for_each_entry(hpdev, &hbus->children, list_entry) {
hpdev = container_of(iter, struct hv_pci_dev, if ((hpdev->desc.win_slot.slot == new_desc->win_slot.slot) &&
list_entry);
if ((hpdev->desc.win_slot.slot ==
new_desc->win_slot.slot) &&
(hpdev->desc.v_id == new_desc->v_id) && (hpdev->desc.v_id == new_desc->v_id) &&
(hpdev->desc.d_id == new_desc->d_id) && (hpdev->desc.d_id == new_desc->d_id) &&
(hpdev->desc.ser == new_desc->ser)) { (hpdev->desc.ser == new_desc->ser)) {
@ -1730,12 +1712,10 @@ static void pci_devices_present_work(struct work_struct *work)
spin_lock_irqsave(&hbus->device_list_lock, flags); spin_lock_irqsave(&hbus->device_list_lock, flags);
do { do {
found = false; found = false;
list_for_each(iter, &hbus->children) { list_for_each_entry(hpdev, &hbus->children, list_entry) {
hpdev = container_of(iter, struct hv_pci_dev,
list_entry);
if (hpdev->reported_missing) { if (hpdev->reported_missing) {
found = true; found = true;
put_pcichild(hpdev, hv_pcidev_ref_childlist); put_pcichild(hpdev);
list_move_tail(&hpdev->list_entry, &removed); list_move_tail(&hpdev->list_entry, &removed);
break; break;
} }
@ -1748,7 +1728,7 @@ static void pci_devices_present_work(struct work_struct *work)
hpdev = list_first_entry(&removed, struct hv_pci_dev, hpdev = list_first_entry(&removed, struct hv_pci_dev,
list_entry); list_entry);
list_del(&hpdev->list_entry); list_del(&hpdev->list_entry);
put_pcichild(hpdev, hv_pcidev_ref_initial); put_pcichild(hpdev);
} }
switch (hbus->state) { switch (hbus->state) {
@ -1883,8 +1863,8 @@ static void hv_eject_device_work(struct work_struct *work)
sizeof(*ejct_pkt), (unsigned long)&ctxt.pkt, sizeof(*ejct_pkt), (unsigned long)&ctxt.pkt,
VM_PKT_DATA_INBAND, 0); VM_PKT_DATA_INBAND, 0);
put_pcichild(hpdev, hv_pcidev_ref_childlist); put_pcichild(hpdev);
put_pcichild(hpdev, hv_pcidev_ref_pnp); put_pcichild(hpdev);
put_hvpcibus(hpdev->hbus); put_hvpcibus(hpdev->hbus);
} }
@ -1899,7 +1879,7 @@ static void hv_eject_device_work(struct work_struct *work)
static void hv_pci_eject_device(struct hv_pci_dev *hpdev) static void hv_pci_eject_device(struct hv_pci_dev *hpdev)
{ {
hpdev->state = hv_pcichild_ejecting; hpdev->state = hv_pcichild_ejecting;
get_pcichild(hpdev, hv_pcidev_ref_pnp); get_pcichild(hpdev);
INIT_WORK(&hpdev->wrk, hv_eject_device_work); INIT_WORK(&hpdev->wrk, hv_eject_device_work);
get_hvpcibus(hpdev->hbus); get_hvpcibus(hpdev->hbus);
queue_work(hpdev->hbus->wq, &hpdev->wrk); queue_work(hpdev->hbus->wq, &hpdev->wrk);
@ -1999,8 +1979,7 @@ static void hv_pci_onchannelcallback(void *context)
dev_message->wslot.slot); dev_message->wslot.slot);
if (hpdev) { if (hpdev) {
hv_pci_eject_device(hpdev); hv_pci_eject_device(hpdev);
put_pcichild(hpdev, put_pcichild(hpdev);
hv_pcidev_ref_by_slot);
} }
break; break;
@ -2069,15 +2048,16 @@ static int hv_pci_protocol_negotiation(struct hv_device *hdev)
sizeof(struct pci_version_request), sizeof(struct pci_version_request),
(unsigned long)pkt, VM_PKT_DATA_INBAND, (unsigned long)pkt, VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (!ret)
ret = wait_for_response(hdev, &comp_pkt.host_event);
if (ret) { if (ret) {
dev_err(&hdev->device, dev_err(&hdev->device,
"PCI Pass-through VSP failed sending version reqquest: %#x", "PCI Pass-through VSP failed to request version: %d",
ret); ret);
goto exit; goto exit;
} }
wait_for_completion(&comp_pkt.host_event);
if (comp_pkt.completion_status >= 0) { if (comp_pkt.completion_status >= 0) {
pci_protocol_version = pci_protocol_versions[i]; pci_protocol_version = pci_protocol_versions[i];
dev_info(&hdev->device, dev_info(&hdev->device,
@ -2286,11 +2266,12 @@ static int hv_pci_enter_d0(struct hv_device *hdev)
ret = vmbus_sendpacket(hdev->channel, d0_entry, sizeof(*d0_entry), ret = vmbus_sendpacket(hdev->channel, d0_entry, sizeof(*d0_entry),
(unsigned long)pkt, VM_PKT_DATA_INBAND, (unsigned long)pkt, VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (!ret)
ret = wait_for_response(hdev, &comp_pkt.host_event);
if (ret) if (ret)
goto exit; goto exit;
wait_for_completion(&comp_pkt.host_event);
if (comp_pkt.completion_status < 0) { if (comp_pkt.completion_status < 0) {
dev_err(&hdev->device, dev_err(&hdev->device,
"PCI Pass-through VSP failed D0 Entry with status %x\n", "PCI Pass-through VSP failed D0 Entry with status %x\n",
@ -2330,11 +2311,10 @@ static int hv_pci_query_relations(struct hv_device *hdev)
ret = vmbus_sendpacket(hdev->channel, &message, sizeof(message), ret = vmbus_sendpacket(hdev->channel, &message, sizeof(message),
0, VM_PKT_DATA_INBAND, 0); 0, VM_PKT_DATA_INBAND, 0);
if (ret) if (!ret)
return ret; ret = wait_for_response(hdev, &comp);
wait_for_completion(&comp); return ret;
return 0;
} }
/** /**
@ -2398,17 +2378,17 @@ static int hv_send_resources_allocated(struct hv_device *hdev)
PCI_RESOURCES_ASSIGNED2; PCI_RESOURCES_ASSIGNED2;
res_assigned2->wslot.slot = hpdev->desc.win_slot.slot; res_assigned2->wslot.slot = hpdev->desc.win_slot.slot;
} }
put_pcichild(hpdev, hv_pcidev_ref_by_slot); put_pcichild(hpdev);
ret = vmbus_sendpacket(hdev->channel, &pkt->message, ret = vmbus_sendpacket(hdev->channel, &pkt->message,
size_res, (unsigned long)pkt, size_res, (unsigned long)pkt,
VM_PKT_DATA_INBAND, VM_PKT_DATA_INBAND,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED); VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (!ret)
ret = wait_for_response(hdev, &comp_pkt.host_event);
if (ret) if (ret)
break; break;
wait_for_completion(&comp_pkt.host_event);
if (comp_pkt.completion_status < 0) { if (comp_pkt.completion_status < 0) {
ret = -EPROTO; ret = -EPROTO;
dev_err(&hdev->device, dev_err(&hdev->device,
@ -2446,7 +2426,7 @@ static int hv_send_resources_released(struct hv_device *hdev)
pkt.message_type.type = PCI_RESOURCES_RELEASED; pkt.message_type.type = PCI_RESOURCES_RELEASED;
pkt.wslot.slot = hpdev->desc.win_slot.slot; pkt.wslot.slot = hpdev->desc.win_slot.slot;
put_pcichild(hpdev, hv_pcidev_ref_by_slot); put_pcichild(hpdev);
ret = vmbus_sendpacket(hdev->channel, &pkt, sizeof(pkt), 0, ret = vmbus_sendpacket(hdev->channel, &pkt, sizeof(pkt), 0,
VM_PKT_DATA_INBAND, 0); VM_PKT_DATA_INBAND, 0);
@ -2459,12 +2439,12 @@ static int hv_send_resources_released(struct hv_device *hdev)
static void get_hvpcibus(struct hv_pcibus_device *hbus) static void get_hvpcibus(struct hv_pcibus_device *hbus)
{ {
atomic_inc(&hbus->remove_lock); refcount_inc(&hbus->remove_lock);
} }
static void put_hvpcibus(struct hv_pcibus_device *hbus) static void put_hvpcibus(struct hv_pcibus_device *hbus)
{ {
if (atomic_dec_and_test(&hbus->remove_lock)) if (refcount_dec_and_test(&hbus->remove_lock))
complete(&hbus->remove_event); complete(&hbus->remove_event);
} }
@ -2508,7 +2488,7 @@ static int hv_pci_probe(struct hv_device *hdev,
hdev->dev_instance.b[8] << 8; hdev->dev_instance.b[8] << 8;
hbus->hdev = hdev; hbus->hdev = hdev;
atomic_inc(&hbus->remove_lock); refcount_set(&hbus->remove_lock, 1);
INIT_LIST_HEAD(&hbus->children); INIT_LIST_HEAD(&hbus->children);
INIT_LIST_HEAD(&hbus->dr_list); INIT_LIST_HEAD(&hbus->dr_list);
INIT_LIST_HEAD(&hbus->resources_for_children); INIT_LIST_HEAD(&hbus->resources_for_children);