- First round of vmbus hibernation support from Dexuan Cui.

- Removal of dependencies on PAGE_SIZE by Maya Nakamura.
 - Moving the hyper-v tools/ code into the tools build system by Andy
 Shevchenko.
 - hyper-v balloon cleanups by Dexuan Cui.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE4n5dijQDou9mhzu83qZv95d3LNwFAl2JanAACgkQ3qZv95d3
 LNwf2Q//Rtmclmnk+lXn1BhEEiXtzliSY7wcjpRR87WCzTIj6p2y//R2PuweQr+b
 dAlXKd6reK/c2Q/FnaQ5Gf1daNWvMh/39viMaesGZcvoSWlT60gDPdnelj6Z8sO0
 gWDRQV5d4AHIs2garl2zTzO+TS9/8Ot/YD0gVpX4wNAy7j9ZeEvNVoanGvQ88Et3
 pSQFKTNLVPOLlMOchm6HAhwBo5k6Y1LB3/RE/qqcX1sR8/CLp4DT0VhsMVA1DZXV
 hb3a0tEzn8fJxifx8/iguZr84SetXA/qTKKWDG59xAU2kijJrLyb3KXRE92GOzlA
 HzwOlnX0vWpTTthEzaLlvOgFKybTNBGMEQQJKmpI2PucC0iaHmYVH2dDxhBb2gX5
 uJGGr4arHjMDQYfppCVy/VXE5hCpKE29L/7kl+DsElM6NkgyJAfK7Crpuxs8KMME
 HwHi5UwTSvaKv1XKilWIDy4PpuzvGx5ftPMyBqgEH/aLK9aP1N+folCTUc01qCFU
 vz/Yjrs/p/U7T9P4rDCXMb+IPiCpr1puBsC/z0RJvsKUdKrzDzpXPLU8Wagv6UxS
 iHpZRR/ArUYByRp3N42+PR8i9uqrcOxtNgzphnRsBo3lzOAphVaQY0tPQkBPSMp2
 SQI2NP1G74l3WdszeeHi446v6S40ichN/FYsDuiGCs9YJY78mMs=
 =Dk9i
 -----END PGP SIGNATURE-----

Merge tag 'hyperv-next-signed' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux

Pull Hyper-V updates from Sasha Levin:

 - first round of vmbus hibernation support (Dexuan Cui)

 - remove dependencies on PAGE_SIZE (Maya Nakamura)

 - move the hyper-v tools/ code into the tools build system (Andy
   Shevchenko)

 - hyper-v balloon cleanups (Dexuan Cui)

* tag 'hyperv-next-signed' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux:
  Drivers: hv: vmbus: Resume after fixing up old primary channels
  Drivers: hv: vmbus: Suspend after cleaning up hv_sock and sub channels
  Drivers: hv: vmbus: Clean up hv_sock channels by force upon suspend
  Drivers: hv: vmbus: Suspend/resume the vmbus itself for hibernation
  Drivers: hv: vmbus: Ignore the offers when resuming from hibernation
  Drivers: hv: vmbus: Implement suspend/resume for VSC drivers for hibernation
  Drivers: hv: vmbus: Add a helper function is_sub_channel()
  Drivers: hv: vmbus: Suspend/resume the synic for hibernation
  Drivers: hv: vmbus: Break out synic enable and disable operations
  HID: hv: Remove dependencies on PAGE_SIZE for ring buffer
  Tools: hv: move to tools buildsystem
  hv_balloon: Reorganize the probe function
  hv_balloon: Use a static page for the balloon_up send buffer
This commit is contained in:
Linus Torvalds 2019-09-24 12:36:31 -07:00
commit af5a7e99cc
10 changed files with 613 additions and 134 deletions

View File

@ -104,8 +104,8 @@ struct synthhid_input_report {
#pragma pack(pop)
#define INPUTVSC_SEND_RING_BUFFER_SIZE (10*PAGE_SIZE)
#define INPUTVSC_RECV_RING_BUFFER_SIZE (10*PAGE_SIZE)
#define INPUTVSC_SEND_RING_BUFFER_SIZE (40 * 1024)
#define INPUTVSC_RECV_RING_BUFFER_SIZE (40 * 1024)
enum pipe_prot_msg_type {

View File

@ -407,6 +407,14 @@ void hv_process_channel_removal(struct vmbus_channel *channel)
cpumask_clear_cpu(channel->target_cpu,
&primary_channel->alloced_cpus_in_node);
/*
* Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
* the relid is invalidated; after hibernation, when the user-space app
* destroys the channel, the relid is INVALID_RELID, and in this case
* it's unnecessary and unsafe to release the old relid, since the same
* relid can refer to a completely different channel now.
*/
if (channel->offermsg.child_relid != INVALID_RELID)
vmbus_release_relid(channel->offermsg.child_relid);
free_channel(channel);
@ -545,6 +553,10 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
mutex_lock(&vmbus_connection.channel_mutex);
/* Remember the channels that should be cleaned up upon suspend. */
if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
/*
* Now that we have acquired the channel_mutex,
* we can release the potentially racing rescind thread.
@ -847,6 +859,67 @@ void vmbus_initiate_unload(bool crash)
vmbus_wait_for_unload();
}
static void check_ready_for_resume_event(void)
{
/*
* If all the old primary channels have been fixed up, then it's safe
* to resume.
*/
if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
complete(&vmbus_connection.ready_for_resume_event);
}
static void vmbus_setup_channel_state(struct vmbus_channel *channel,
struct vmbus_channel_offer_channel *offer)
{
/*
* Setup state for signalling the host.
*/
channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
if (vmbus_proto_version != VERSION_WS2008) {
channel->is_dedicated_interrupt =
(offer->is_dedicated_interrupt != 0);
channel->sig_event = offer->connection_id;
}
memcpy(&channel->offermsg, offer,
sizeof(struct vmbus_channel_offer_channel));
channel->monitor_grp = (u8)offer->monitorid / 32;
channel->monitor_bit = (u8)offer->monitorid % 32;
}
/*
* find_primary_channel_by_offer - Get the channel object given the new offer.
* This is only used in the resume path of hibernation.
*/
static struct vmbus_channel *
find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer)
{
struct vmbus_channel *channel = NULL, *iter;
const guid_t *inst1, *inst2;
/* Ignore sub-channel offers. */
if (offer->offer.sub_channel_index != 0)
return NULL;
mutex_lock(&vmbus_connection.channel_mutex);
list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) {
inst1 = &iter->offermsg.offer.if_instance;
inst2 = &offer->offer.if_instance;
if (guid_equal(inst1, inst2)) {
channel = iter;
break;
}
}
mutex_unlock(&vmbus_connection.channel_mutex);
return channel;
}
/*
* vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
*
@ -854,12 +927,58 @@ void vmbus_initiate_unload(bool crash)
static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
{
struct vmbus_channel_offer_channel *offer;
struct vmbus_channel *newchannel;
struct vmbus_channel *oldchannel, *newchannel;
size_t offer_sz;
offer = (struct vmbus_channel_offer_channel *)hdr;
trace_vmbus_onoffer(offer);
oldchannel = find_primary_channel_by_offer(offer);
if (oldchannel != NULL) {
atomic_dec(&vmbus_connection.offer_in_progress);
/*
* We're resuming from hibernation: all the sub-channel and
* hv_sock channels we had before the hibernation should have
* been cleaned up, and now we must be seeing a re-offered
* primary channel that we had before the hibernation.
*/
WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
/* Fix up the relid. */
oldchannel->offermsg.child_relid = offer->child_relid;
offer_sz = sizeof(*offer);
if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0) {
check_ready_for_resume_event();
return;
}
/*
* This is not an error, since the host can also change the
* other field(s) of the offer, e.g. on WS RS5 (Build 17763),
* the offer->connection_id of the Mellanox VF vmbus device
* can change when the host reoffers the device upon resume.
*/
pr_debug("vmbus offer changed: relid=%d\n",
offer->child_relid);
print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET,
16, 4, &oldchannel->offermsg, offer_sz,
false);
print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET,
16, 4, offer, offer_sz, false);
/* Fix up the old channel. */
vmbus_setup_channel_state(oldchannel, offer);
check_ready_for_resume_event();
return;
}
/* Allocate the channel object and save this offer. */
newchannel = alloc_channel();
if (!newchannel) {
@ -869,25 +988,21 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
return;
}
/*
* Setup state for signalling the host.
*/
newchannel->sig_event = VMBUS_EVENT_CONNECTION_ID;
if (vmbus_proto_version != VERSION_WS2008) {
newchannel->is_dedicated_interrupt =
(offer->is_dedicated_interrupt != 0);
newchannel->sig_event = offer->connection_id;
}
memcpy(&newchannel->offermsg, offer,
sizeof(struct vmbus_channel_offer_channel));
newchannel->monitor_grp = (u8)offer->monitorid / 32;
newchannel->monitor_bit = (u8)offer->monitorid % 32;
vmbus_setup_channel_state(newchannel, offer);
vmbus_process_offer(newchannel);
}
static void check_ready_for_suspend_event(void)
{
/*
* If all the sub-channels or hv_sock channels have been cleaned up,
* then it's safe to suspend.
*/
if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend))
complete(&vmbus_connection.ready_for_suspend_event);
}
/*
* vmbus_onoffer_rescind - Rescind offer handler.
*
@ -898,6 +1013,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
struct vmbus_channel_rescind_offer *rescind;
struct vmbus_channel *channel;
struct device *dev;
bool clean_up_chan_for_suspend;
rescind = (struct vmbus_channel_rescind_offer *)hdr;
@ -937,6 +1053,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
return;
}
clean_up_chan_for_suspend = is_hvsock_channel(channel) ||
is_sub_channel(channel);
/*
* Before setting channel->rescind in vmbus_rescind_cleanup(), we
* should make sure the channel callback is not running any more.
@ -962,6 +1080,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
if (channel->device_obj) {
if (channel->chn_rescind_callback) {
channel->chn_rescind_callback(channel);
if (clean_up_chan_for_suspend)
check_ready_for_suspend_event();
return;
}
/*
@ -994,6 +1116,11 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
}
mutex_unlock(&vmbus_connection.channel_mutex);
}
/* The "channel" may have been freed. Do not access it any longer. */
if (clean_up_chan_for_suspend)
check_ready_for_suspend_event();
}
void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)

View File

@ -26,6 +26,11 @@
struct vmbus_connection vmbus_connection = {
.conn_state = DISCONNECTED,
.next_gpadl_handle = ATOMIC_INIT(0xE1E10),
.ready_for_suspend_event= COMPLETION_INITIALIZER(
vmbus_connection.ready_for_suspend_event),
.ready_for_resume_event = COMPLETION_INITIALIZER(
vmbus_connection.ready_for_resume_event),
};
EXPORT_SYMBOL_GPL(vmbus_connection);
@ -59,8 +64,7 @@ static __u32 vmbus_get_next_version(__u32 current_version)
}
}
static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
__u32 version)
int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
{
int ret = 0;
unsigned int cur_cpu;

View File

@ -154,7 +154,7 @@ void hv_synic_free(void)
* retrieve the initialized message and event pages. Otherwise, we create and
* initialize the message and event pages.
*/
int hv_synic_init(unsigned int cpu)
void hv_synic_enable_regs(unsigned int cpu)
{
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
@ -196,6 +196,11 @@ int hv_synic_init(unsigned int cpu)
sctrl.enable = 1;
hv_set_synic_state(sctrl.as_uint64);
}
int hv_synic_init(unsigned int cpu)
{
hv_synic_enable_regs(cpu);
hv_stimer_init(cpu);
@ -205,20 +210,45 @@ int hv_synic_init(unsigned int cpu)
/*
* hv_synic_cleanup - Cleanup routine for hv_synic_init().
*/
int hv_synic_cleanup(unsigned int cpu)
void hv_synic_disable_regs(unsigned int cpu)
{
union hv_synic_sint shared_sint;
union hv_synic_simp simp;
union hv_synic_siefp siefp;
union hv_synic_scontrol sctrl;
hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
shared_sint.masked = 1;
/* Need to correctly cleanup in the case of SMP!!! */
/* Disable the interrupt */
hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
hv_get_simp(simp.as_uint64);
simp.simp_enabled = 0;
simp.base_simp_gpa = 0;
hv_set_simp(simp.as_uint64);
hv_get_siefp(siefp.as_uint64);
siefp.siefp_enabled = 0;
siefp.base_siefp_gpa = 0;
hv_set_siefp(siefp.as_uint64);
/* Disable the global synic bit */
hv_get_synic_state(sctrl.as_uint64);
sctrl.enable = 0;
hv_set_synic_state(sctrl.as_uint64);
}
int hv_synic_cleanup(unsigned int cpu)
{
struct vmbus_channel *channel, *sc;
bool channel_found = false;
unsigned long flags;
hv_get_synic_state(sctrl.as_uint64);
if (sctrl.enable != 1)
return -EFAULT;
/*
* Search for channels which are bound to the CPU we're about to
* cleanup. In case we find one and vmbus is still connected we need to
@ -249,29 +279,7 @@ int hv_synic_cleanup(unsigned int cpu)
hv_stimer_cleanup(cpu);
hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
shared_sint.masked = 1;
/* Need to correctly cleanup in the case of SMP!!! */
/* Disable the interrupt */
hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
hv_get_simp(simp.as_uint64);
simp.simp_enabled = 0;
simp.base_simp_gpa = 0;
hv_set_simp(simp.as_uint64);
hv_get_siefp(siefp.as_uint64);
siefp.siefp_enabled = 0;
siefp.base_siefp_gpa = 0;
hv_set_siefp(siefp.as_uint64);
/* Disable the global synic bit */
sctrl.enable = 0;
hv_set_synic_state(sctrl.as_uint64);
hv_synic_disable_regs(cpu);
return 0;
}

View File

@ -494,7 +494,7 @@ enum hv_dm_state {
static __u8 recv_buffer[PAGE_SIZE];
static __u8 *send_buffer;
static __u8 balloon_up_send_buffer[PAGE_SIZE];
#define PAGES_IN_2M 512
#define HA_CHUNK (32 * 1024)
@ -1292,8 +1292,8 @@ static void balloon_up(struct work_struct *dummy)
}
while (!done) {
bl_resp = (struct dm_balloon_response *)send_buffer;
memset(send_buffer, 0, PAGE_SIZE);
memset(balloon_up_send_buffer, 0, PAGE_SIZE);
bl_resp = (struct dm_balloon_response *)balloon_up_send_buffer;
bl_resp->hdr.type = DM_BALLOON_RESPONSE;
bl_resp->hdr.size = sizeof(struct dm_balloon_response);
bl_resp->more_pages = 1;
@ -1564,58 +1564,18 @@ static void balloon_onchannelcallback(void *context)
}
static int balloon_probe(struct hv_device *dev,
const struct hv_vmbus_device_id *dev_id)
static int balloon_connect_vsp(struct hv_device *dev)
{
int ret;
unsigned long t;
struct dm_version_request version_req;
struct dm_capabilities cap_msg;
#ifdef CONFIG_MEMORY_HOTPLUG
do_hot_add = hot_add;
#else
do_hot_add = false;
#endif
/*
* First allocate a send buffer.
*/
send_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!send_buffer)
return -ENOMEM;
unsigned long t;
int ret;
ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0,
balloon_onchannelcallback, dev);
if (ret)
goto probe_error0;
return ret;
dm_device.dev = dev;
dm_device.state = DM_INITIALIZING;
dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8;
init_completion(&dm_device.host_event);
init_completion(&dm_device.config_event);
INIT_LIST_HEAD(&dm_device.ha_region_list);
spin_lock_init(&dm_device.ha_lock);
INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
dm_device.host_specified_ha_region = false;
dm_device.thread =
kthread_run(dm_thread_func, &dm_device, "hv_balloon");
if (IS_ERR(dm_device.thread)) {
ret = PTR_ERR(dm_device.thread);
goto probe_error1;
}
#ifdef CONFIG_MEMORY_HOTPLUG
set_online_page_callback(&hv_online_page);
register_memory_notifier(&hv_memory_nb);
#endif
hv_set_drvdata(dev, &dm_device);
/*
* Initiate the hand shake with the host and negotiate
* a version that the host can support. We start with the
@ -1632,15 +1592,14 @@ static int balloon_probe(struct hv_device *dev,
ret = vmbus_sendpacket(dev->channel, &version_req,
sizeof(struct dm_version_request),
(unsigned long)NULL,
VM_PKT_DATA_INBAND, 0);
(unsigned long)NULL, VM_PKT_DATA_INBAND, 0);
if (ret)
goto probe_error2;
goto out;
t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
if (t == 0) {
ret = -ETIMEDOUT;
goto probe_error2;
goto out;
}
/*
@ -1648,8 +1607,8 @@ static int balloon_probe(struct hv_device *dev,
* fail the probe function.
*/
if (dm_device.state == DM_INIT_ERROR) {
ret = -ETIMEDOUT;
goto probe_error2;
ret = -EPROTO;
goto out;
}
pr_info("Using Dynamic Memory protocol version %u.%u\n",
@ -1683,15 +1642,14 @@ static int balloon_probe(struct hv_device *dev,
ret = vmbus_sendpacket(dev->channel, &cap_msg,
sizeof(struct dm_capabilities),
(unsigned long)NULL,
VM_PKT_DATA_INBAND, 0);
(unsigned long)NULL, VM_PKT_DATA_INBAND, 0);
if (ret)
goto probe_error2;
goto out;
t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
if (t == 0) {
ret = -ETIMEDOUT;
goto probe_error2;
goto out;
}
/*
@ -1699,25 +1657,65 @@ static int balloon_probe(struct hv_device *dev,
* fail the probe function.
*/
if (dm_device.state == DM_INIT_ERROR) {
ret = -ETIMEDOUT;
goto probe_error2;
ret = -EPROTO;
goto out;
}
return 0;
out:
vmbus_close(dev->channel);
return ret;
}
static int balloon_probe(struct hv_device *dev,
const struct hv_vmbus_device_id *dev_id)
{
int ret;
#ifdef CONFIG_MEMORY_HOTPLUG
do_hot_add = hot_add;
#else
do_hot_add = false;
#endif
dm_device.dev = dev;
dm_device.state = DM_INITIALIZING;
dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8;
init_completion(&dm_device.host_event);
init_completion(&dm_device.config_event);
INIT_LIST_HEAD(&dm_device.ha_region_list);
spin_lock_init(&dm_device.ha_lock);
INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
dm_device.host_specified_ha_region = false;
#ifdef CONFIG_MEMORY_HOTPLUG
set_online_page_callback(&hv_online_page);
register_memory_notifier(&hv_memory_nb);
#endif
hv_set_drvdata(dev, &dm_device);
ret = balloon_connect_vsp(dev);
if (ret != 0)
return ret;
dm_device.state = DM_INITIALIZED;
last_post_time = jiffies;
dm_device.thread =
kthread_run(dm_thread_func, &dm_device, "hv_balloon");
if (IS_ERR(dm_device.thread)) {
ret = PTR_ERR(dm_device.thread);
goto probe_error;
}
return 0;
probe_error2:
probe_error:
vmbus_close(dev->channel);
#ifdef CONFIG_MEMORY_HOTPLUG
unregister_memory_notifier(&hv_memory_nb);
restore_online_page_callback(&hv_online_page);
#endif
kthread_stop(dm_device.thread);
probe_error1:
vmbus_close(dev->channel);
probe_error0:
kfree(send_buffer);
return ret;
}
@ -1734,12 +1732,11 @@ static int balloon_remove(struct hv_device *dev)
cancel_work_sync(&dm->balloon_wrk.wrk);
cancel_work_sync(&dm->ha_wrk.wrk);
vmbus_close(dev->channel);
kthread_stop(dm->thread);
kfree(send_buffer);
vmbus_close(dev->channel);
#ifdef CONFIG_MEMORY_HOTPLUG
restore_online_page_callback(&hv_online_page);
unregister_memory_notifier(&hv_memory_nb);
restore_online_page_callback(&hv_online_page);
#endif
spin_lock_irqsave(&dm_device.ha_lock, flags);
list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) {

View File

@ -169,8 +169,10 @@ extern int hv_synic_alloc(void);
extern void hv_synic_free(void);
extern void hv_synic_enable_regs(unsigned int cpu);
extern int hv_synic_init(unsigned int cpu);
extern void hv_synic_disable_regs(unsigned int cpu);
extern int hv_synic_cleanup(unsigned int cpu);
/* Interface */
@ -256,6 +258,32 @@ struct vmbus_connection {
struct workqueue_struct *work_queue;
struct workqueue_struct *handle_primary_chan_wq;
struct workqueue_struct *handle_sub_chan_wq;
/*
* The number of sub-channels and hv_sock channels that should be
* cleaned up upon suspend: sub-channels will be re-created upon
* resume, and hv_sock channels should not survive suspend.
*/
atomic_t nr_chan_close_on_suspend;
/*
* vmbus_bus_suspend() waits for "nr_chan_close_on_suspend" to
* drop to zero.
*/
struct completion ready_for_suspend_event;
/*
* The number of primary channels that should be "fixed up"
* upon resume: these channels are re-offered upon resume, and some
* fields of the channel offers (i.e. child_relid and connection_id)
* can change, so the old offermsg must be fixed up, before the resume
* callbacks of the VSC drivers start to further touch the channels.
*/
atomic_t nr_chan_fixup_on_resume;
/*
* vmbus_bus_resume() waits for "nr_chan_fixup_on_resume" to
* drop to zero.
*/
struct completion ready_for_resume_event;
};
@ -270,6 +298,8 @@ struct vmbus_msginfo {
extern struct vmbus_connection vmbus_connection;
int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version);
static inline void vmbus_send_interrupt(u32 relid)
{
sync_set_bit(relid, vmbus_connection.send_int_page);

View File

@ -24,12 +24,14 @@
#include <linux/sched/task_stack.h>
#include <asm/mshyperv.h>
#include <linux/delay.h>
#include <linux/notifier.h>
#include <linux/ptrace.h>
#include <linux/screen_info.h>
#include <linux/kdebug.h>
#include <linux/efi.h>
#include <linux/random.h>
#include <linux/syscore_ops.h>
#include <clocksource/hyperv_timer.h>
#include "hyperv_vmbus.h"
@ -910,6 +912,43 @@ static void vmbus_shutdown(struct device *child_device)
drv->shutdown(dev);
}
/*
* vmbus_suspend - Suspend a vmbus device
*/
static int vmbus_suspend(struct device *child_device)
{
struct hv_driver *drv;
struct hv_device *dev = device_to_hv_device(child_device);
/* The device may not be attached yet */
if (!child_device->driver)
return 0;
drv = drv_to_hv_drv(child_device->driver);
if (!drv->suspend)
return -EOPNOTSUPP;
return drv->suspend(dev);
}
/*
* vmbus_resume - Resume a vmbus device
*/
static int vmbus_resume(struct device *child_device)
{
struct hv_driver *drv;
struct hv_device *dev = device_to_hv_device(child_device);
/* The device may not be attached yet */
if (!child_device->driver)
return 0;
drv = drv_to_hv_drv(child_device->driver);
if (!drv->resume)
return -EOPNOTSUPP;
return drv->resume(dev);
}
/*
* vmbus_device_release - Final callback release of the vmbus child device
@ -925,6 +964,14 @@ static void vmbus_device_release(struct device *device)
kfree(hv_dev);
}
/*
* Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
* SET_SYSTEM_SLEEP_PM_OPS: see the comment before vmbus_bus_pm.
*/
static const struct dev_pm_ops vmbus_pm = {
SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_suspend, vmbus_resume)
};
/* The one and only one */
static struct bus_type hv_bus = {
.name = "vmbus",
@ -935,6 +982,7 @@ static struct bus_type hv_bus = {
.uevent = vmbus_uevent,
.dev_groups = vmbus_dev_groups,
.drv_groups = vmbus_drv_groups,
.pm = &vmbus_pm,
};
struct onmessage_work_context {
@ -1022,6 +1070,41 @@ msg_handled:
vmbus_signal_eom(msg, message_type);
}
/*
* Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
* hibernation, because hv_sock connections can not persist across hibernation.
*/
static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
{
struct onmessage_work_context *ctx;
struct vmbus_channel_rescind_offer *rescind;
WARN_ON(!is_hvsock_channel(channel));
/*
* sizeof(*ctx) is small and the allocation should really not fail,
* otherwise the state of the hv_sock connections ends up in limbo.
*/
ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL);
/*
* So far, these are not really used by Linux. Just set them to the
* reasonable values conforming to the definitions of the fields.
*/
ctx->msg.header.message_type = 1;
ctx->msg.header.payload_size = sizeof(*rescind);
/* These values are actually used by Linux. */
rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.u.payload;
rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER;
rescind->child_relid = channel->offermsg.child_relid;
INIT_WORK(&ctx->work, vmbus_onmessage_work);
queue_work_on(vmbus_connection.connect_cpu,
vmbus_connection.work_queue,
&ctx->work);
}
/*
* Direct callback for channels using other deferred processing
@ -2042,6 +2125,129 @@ acpi_walk_err:
return ret_val;
}
static int vmbus_bus_suspend(struct device *dev)
{
struct vmbus_channel *channel, *sc;
unsigned long flags;
while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
/*
* We wait here until the completion of any channel
* offers that are currently in progress.
*/
msleep(1);
}
mutex_lock(&vmbus_connection.channel_mutex);
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
if (!is_hvsock_channel(channel))
continue;
vmbus_force_channel_rescinded(channel);
}
mutex_unlock(&vmbus_connection.channel_mutex);
/*
* Wait until all the sub-channels and hv_sock channels have been
* cleaned up. Sub-channels should be destroyed upon suspend, otherwise
* they would conflict with the new sub-channels that will be created
* in the resume path. hv_sock channels should also be destroyed, but
* a hv_sock channel of an established hv_sock connection can not be
* really destroyed since it may still be referenced by the userspace
* application, so we just force the hv_sock channel to be rescinded
* by vmbus_force_channel_rescinded(), and the userspace application
* will thoroughly destroy the channel after hibernation.
*
* Note: the counter nr_chan_close_on_suspend may never go above 0 if
* the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM.
*/
if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
wait_for_completion(&vmbus_connection.ready_for_suspend_event);
WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0);
mutex_lock(&vmbus_connection.channel_mutex);
list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
/*
* Invalidate the field. Upon resume, vmbus_onoffer() will fix
* up the field, and the other fields (if necessary).
*/
channel->offermsg.child_relid = INVALID_RELID;
if (is_hvsock_channel(channel)) {
if (!channel->rescind) {
pr_err("hv_sock channel not rescinded!\n");
WARN_ON_ONCE(1);
}
continue;
}
spin_lock_irqsave(&channel->lock, flags);
list_for_each_entry(sc, &channel->sc_list, sc_list) {
pr_err("Sub-channel not deleted!\n");
WARN_ON_ONCE(1);
}
spin_unlock_irqrestore(&channel->lock, flags);
atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume);
}
mutex_unlock(&vmbus_connection.channel_mutex);
vmbus_initiate_unload(false);
vmbus_connection.conn_state = DISCONNECTED;
/* Reset the event for the next resume. */
reinit_completion(&vmbus_connection.ready_for_resume_event);
return 0;
}
static int vmbus_bus_resume(struct device *dev)
{
struct vmbus_channel_msginfo *msginfo;
size_t msgsize;
int ret;
/*
* We only use the 'vmbus_proto_version', which was in use before
* hibernation, to re-negotiate with the host.
*/
if (vmbus_proto_version == VERSION_INVAL ||
vmbus_proto_version == 0) {
pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version);
return -EINVAL;
}
msgsize = sizeof(*msginfo) +
sizeof(struct vmbus_channel_initiate_contact);
msginfo = kzalloc(msgsize, GFP_KERNEL);
if (msginfo == NULL)
return -ENOMEM;
ret = vmbus_negotiate_version(msginfo, vmbus_proto_version);
kfree(msginfo);
if (ret != 0)
return ret;
WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0);
vmbus_request_offers();
wait_for_completion(&vmbus_connection.ready_for_resume_event);
/* Reset the event for the next suspend. */
reinit_completion(&vmbus_connection.ready_for_suspend_event);
return 0;
}
static const struct acpi_device_id vmbus_acpi_device_ids[] = {
{"VMBUS", 0},
{"VMBus", 0},
@ -2049,6 +2255,19 @@ static const struct acpi_device_id vmbus_acpi_device_ids[] = {
};
MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);
/*
* Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
* SET_SYSTEM_SLEEP_PM_OPS, otherwise NIC SR-IOV can not work, because the
* "pci_dev_pm_ops" uses the "noirq" callbacks: in the resume path, the
* pci "noirq" restore callback runs before "non-noirq" callbacks (see
* resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() ->
* dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's
* resume callback must also run via the "noirq" callbacks.
*/
static const struct dev_pm_ops vmbus_bus_pm = {
SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_bus_suspend, vmbus_bus_resume)
};
static struct acpi_driver vmbus_acpi_driver = {
.name = "vmbus",
.ids = vmbus_acpi_device_ids,
@ -2056,6 +2275,7 @@ static struct acpi_driver vmbus_acpi_driver = {
.add = vmbus_acpi_add,
.remove = vmbus_acpi_remove,
},
.drv.pm = &vmbus_bus_pm,
};
static void hv_kexec_handler(void)
@ -2086,6 +2306,47 @@ static void hv_crash_handler(struct pt_regs *regs)
hyperv_cleanup();
};
static int hv_synic_suspend(void)
{
/*
* When we reach here, all the non-boot CPUs have been offlined, and
* the stimers on them have been unbound in hv_synic_cleanup() ->
* hv_stimer_cleanup() -> clockevents_unbind_device().
*
* hv_synic_suspend() only runs on CPU0 with interrupts disabled. Here
* we do not unbind the stimer on CPU0 because: 1) it's unnecessary
* because the interrupts remain disabled between syscore_suspend()
* and syscore_resume(): see create_image() and resume_target_kernel();
* 2) the stimer on CPU0 is automatically disabled later by
* syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ...
* -> clockevents_shutdown() -> ... -> hv_ce_shutdown(); 3) a warning
* would be triggered if we call clockevents_unbind_device(), which
* may sleep, in an interrupts-disabled context. So, we intentionally
* don't call hv_stimer_cleanup(0) here.
*/
hv_synic_disable_regs(0);
return 0;
}
static void hv_synic_resume(void)
{
hv_synic_enable_regs(0);
/*
* Note: we don't need to call hv_stimer_init(0), because the timer
* on CPU0 is not unbound in hv_synic_suspend(), and the timer is
* automatically re-enabled in timekeeping_resume().
*/
}
/* The callbacks run only on CPU0, with irqs_disabled. */
static struct syscore_ops hv_synic_syscore_ops = {
.suspend = hv_synic_suspend,
.resume = hv_synic_resume,
};
static int __init hv_acpi_init(void)
{
int ret, t;
@ -2116,6 +2377,8 @@ static int __init hv_acpi_init(void)
hv_setup_kexec_handler(hv_kexec_handler);
hv_setup_crash_handler(hv_crash_handler);
register_syscore_ops(&hv_synic_syscore_ops);
return 0;
cleanup:
@ -2128,6 +2391,8 @@ static void __exit vmbus_exit(void)
{
int cpu;
unregister_syscore_ops(&hv_synic_syscore_ops);
hv_remove_kexec_handler();
hv_remove_crash_handler();
vmbus_connection.conn_state = DISCONNECTED;

View File

@ -245,7 +245,10 @@ struct vmbus_channel_offer {
} pipe;
} u;
/*
* The sub_channel_index is defined in win8.
* The sub_channel_index is defined in Win8: a value of zero means a
* primary channel and a value of non-zero means a sub-channel.
*
* Before Win8, the field is reserved, meaning it's always zero.
*/
u16 sub_channel_index;
u16 reserved3;
@ -423,6 +426,9 @@ enum vmbus_channel_message_type {
CHANNELMSG_COUNT
};
/* Hyper-V supports about 2048 channels, and the RELIDs start with 1. */
#define INVALID_RELID U32_MAX
struct vmbus_channel_message_header {
enum vmbus_channel_message_type msgtype;
u32 padding;
@ -934,6 +940,11 @@ static inline bool is_hvsock_channel(const struct vmbus_channel *c)
VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER);
}
static inline bool is_sub_channel(const struct vmbus_channel *c)
{
return c->offermsg.offer.sub_channel_index != 0;
}
static inline void set_channel_affinity_state(struct vmbus_channel *c,
enum hv_numa_policy policy)
{
@ -1149,6 +1160,9 @@ struct hv_driver {
int (*remove)(struct hv_device *);
void (*shutdown)(struct hv_device *);
int (*suspend)(struct hv_device *);
int (*resume)(struct hv_device *);
};
/* Base device object */

3
tools/hv/Build Normal file
View File

@ -0,0 +1,3 @@
hv_kvp_daemon-y += hv_kvp_daemon.o
hv_vss_daemon-y += hv_vss_daemon.o
hv_fcopy_daemon-y += hv_fcopy_daemon.o

View File

@ -1,28 +1,55 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for Hyper-V tools
WARNINGS = -Wall -Wextra
CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS)
CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
include ../scripts/Makefile.include
sbindir ?= /usr/sbin
libexecdir ?= /usr/libexec
sharedstatedir ?= /var/lib
ALL_PROGRAMS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(CURDIR)))
srctree := $(patsubst %/,%,$(dir $(srctree)))
endif
# Do not use make's built-in rules
# (this improves performance and avoids hard-to-debug behaviour);
MAKEFLAGS += -r
override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
ALL_TARGETS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
ALL_SCRIPTS := hv_get_dhcp_info.sh hv_get_dns_info.sh hv_set_ifconfig.sh
all: $(ALL_PROGRAMS)
%: %.c
$(CC) $(CFLAGS) -o $@ $^
export srctree OUTPUT CC LD CFLAGS
include $(srctree)/tools/build/Makefile.include
HV_KVP_DAEMON_IN := $(OUTPUT)hv_kvp_daemon-in.o
$(HV_KVP_DAEMON_IN): FORCE
$(Q)$(MAKE) $(build)=hv_kvp_daemon
$(OUTPUT)hv_kvp_daemon: $(HV_KVP_DAEMON_IN)
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
HV_VSS_DAEMON_IN := $(OUTPUT)hv_vss_daemon-in.o
$(HV_VSS_DAEMON_IN): FORCE
$(Q)$(MAKE) $(build)=hv_vss_daemon
$(OUTPUT)hv_vss_daemon: $(HV_VSS_DAEMON_IN)
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
HV_FCOPY_DAEMON_IN := $(OUTPUT)hv_fcopy_daemon-in.o
$(HV_FCOPY_DAEMON_IN): FORCE
$(Q)$(MAKE) $(build)=hv_fcopy_daemon
$(OUTPUT)hv_fcopy_daemon: $(HV_FCOPY_DAEMON_IN)
$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
clean:
$(RM) hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
rm -f $(ALL_PROGRAMS)
find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
install: all
install: $(ALL_PROGRAMS)
install -d -m 755 $(DESTDIR)$(sbindir); \
install -d -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd; \
install -d -m 755 $(DESTDIR)$(sharedstatedir); \
@ -33,3 +60,7 @@ install: all
for script in $(ALL_SCRIPTS); do \
install $$script -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd/$${script%.sh}; \
done
FORCE:
.PHONY: all install clean FORCE prepare