393 lines
14 KiB
Diff
393 lines
14 KiB
Diff
From: Jan Beulich <jbeulich@suse.com>
|
|
Subject: evtchn: address races with evtchn_reset()
|
|
|
|
Neither d->evtchn_port_ops nor max_evtchns(d) may be used in an entirely
|
|
lock-less manner, as both may change by a racing evtchn_reset(). In the
|
|
common case, at least one of the domain's event lock or the per-channel
|
|
lock needs to be held. In the specific case of the inter-domain sending
|
|
by evtchn_send() and notify_via_xen_event_channel() holding the other
|
|
side's per-channel lock is sufficient, as the channel can't change state
|
|
without both per-channel locks held. Without such a channel changing
|
|
state, evtchn_reset() can't complete successfully.
|
|
|
|
Lock-free accesses continue to be permitted for the shim (calling some
|
|
otherwise internal event channel functions), as this happens while the
|
|
domain is in effectively single-threaded mode. Special care also needs
|
|
taking for the shim's marking of in-use ports as ECS_RESERVED (allowing
|
|
use of such ports in the shim case is okay because switching into and
|
|
hence also out of FIFO mode is impossihble there).
|
|
|
|
As a side effect, certain operations on Xen bound event channels which
|
|
were mistakenly permitted so far (e.g. unmask or poll) will be refused
|
|
now.
|
|
|
|
This is part of XSA-343.
|
|
|
|
Reported-by: Julien Grall <jgrall@amazon.com>
|
|
Signed-off-by: Jan Beulich <jbeulich@suse.com>
|
|
Acked-by: Julien Grall <jgrall@amazon.com>
|
|
---
|
|
v9: Add arch_evtchn_is_special() to fix PV shim.
|
|
v8: Add BUILD_BUG_ON() in evtchn_usable().
|
|
v7: Add locking related comment ahead of struct evtchn_port_ops.
|
|
v6: New.
|
|
---
|
|
TBD: I've been considering to move some of the wrappers from xen/event.h
|
|
into event_channel.c (or even drop them altogether), when they
|
|
require external locking (e.g. evtchn_port_init() or
|
|
evtchn_port_set_priority()). Does anyone have a strong opinion
|
|
either way?
|
|
|
|
--- a/xen/arch/x86/irq.c
|
|
+++ b/xen/arch/x86/irq.c
|
|
@@ -2488,14 +2488,24 @@ static void dump_irqs(unsigned char key)
|
|
|
|
for ( i = 0; i < action->nr_guests; )
|
|
{
|
|
+ struct evtchn *evtchn;
|
|
+ unsigned int pending = 2, masked = 2;
|
|
+
|
|
d = action->guest[i++];
|
|
pirq = domain_irq_to_pirq(d, irq);
|
|
info = pirq_info(d, pirq);
|
|
+ evtchn = evtchn_from_port(d, info->evtchn);
|
|
+ local_irq_disable();
|
|
+ if ( spin_trylock(&evtchn->lock) )
|
|
+ {
|
|
+ pending = evtchn_is_pending(d, evtchn);
|
|
+ masked = evtchn_is_masked(d, evtchn);
|
|
+ spin_unlock(&evtchn->lock);
|
|
+ }
|
|
+ local_irq_enable();
|
|
printk("d%d:%3d(%c%c%c)%c",
|
|
- d->domain_id, pirq,
|
|
- evtchn_port_is_pending(d, info->evtchn) ? 'P' : '-',
|
|
- evtchn_port_is_masked(d, info->evtchn) ? 'M' : '-',
|
|
- info->masked ? 'M' : '-',
|
|
+ d->domain_id, pirq, "-P?"[pending],
|
|
+ "-M?"[masked], info->masked ? 'M' : '-',
|
|
i < action->nr_guests ? ',' : '\n');
|
|
}
|
|
}
|
|
--- a/xen/arch/x86/pv/shim.c
|
|
+++ b/xen/arch/x86/pv/shim.c
|
|
@@ -660,8 +660,11 @@ void pv_shim_inject_evtchn(unsigned int
|
|
if ( port_is_valid(guest, port) )
|
|
{
|
|
struct evtchn *chn = evtchn_from_port(guest, port);
|
|
+ unsigned long flags;
|
|
|
|
+ spin_lock_irqsave(&chn->lock, flags);
|
|
evtchn_port_set_pending(guest, chn->notify_vcpu_id, chn);
|
|
+ spin_unlock_irqrestore(&chn->lock, flags);
|
|
}
|
|
}
|
|
|
|
--- a/xen/common/event_2l.c
|
|
+++ b/xen/common/event_2l.c
|
|
@@ -63,8 +63,10 @@ static void evtchn_2l_unmask(struct doma
|
|
}
|
|
}
|
|
|
|
-static bool evtchn_2l_is_pending(const struct domain *d, evtchn_port_t port)
|
|
+static bool evtchn_2l_is_pending(const struct domain *d,
|
|
+ const struct evtchn *evtchn)
|
|
{
|
|
+ evtchn_port_t port = evtchn->port;
|
|
unsigned int max_ports = BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d);
|
|
|
|
ASSERT(port < max_ports);
|
|
@@ -72,8 +74,10 @@ static bool evtchn_2l_is_pending(const s
|
|
guest_test_bit(d, port, &shared_info(d, evtchn_pending)));
|
|
}
|
|
|
|
-static bool evtchn_2l_is_masked(const struct domain *d, evtchn_port_t port)
|
|
+static bool evtchn_2l_is_masked(const struct domain *d,
|
|
+ const struct evtchn *evtchn)
|
|
{
|
|
+ evtchn_port_t port = evtchn->port;
|
|
unsigned int max_ports = BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d);
|
|
|
|
ASSERT(port < max_ports);
|
|
--- a/xen/common/event_channel.c
|
|
+++ b/xen/common/event_channel.c
|
|
@@ -156,8 +156,9 @@ int evtchn_allocate_port(struct domain *
|
|
|
|
if ( port_is_valid(d, port) )
|
|
{
|
|
- if ( evtchn_from_port(d, port)->state != ECS_FREE ||
|
|
- evtchn_port_is_busy(d, port) )
|
|
+ const struct evtchn *chn = evtchn_from_port(d, port);
|
|
+
|
|
+ if ( chn->state != ECS_FREE || evtchn_is_busy(d, chn) )
|
|
return -EBUSY;
|
|
}
|
|
else
|
|
@@ -774,6 +775,7 @@ void send_guest_vcpu_virq(struct vcpu *v
|
|
unsigned long flags;
|
|
int port;
|
|
struct domain *d;
|
|
+ struct evtchn *chn;
|
|
|
|
ASSERT(!virq_is_global(virq));
|
|
|
|
@@ -784,7 +786,10 @@ void send_guest_vcpu_virq(struct vcpu *v
|
|
goto out;
|
|
|
|
d = v->domain;
|
|
- evtchn_port_set_pending(d, v->vcpu_id, evtchn_from_port(d, port));
|
|
+ chn = evtchn_from_port(d, port);
|
|
+ spin_lock(&chn->lock);
|
|
+ evtchn_port_set_pending(d, v->vcpu_id, chn);
|
|
+ spin_unlock(&chn->lock);
|
|
|
|
out:
|
|
spin_unlock_irqrestore(&v->virq_lock, flags);
|
|
@@ -813,7 +818,9 @@ void send_guest_global_virq(struct domai
|
|
goto out;
|
|
|
|
chn = evtchn_from_port(d, port);
|
|
+ spin_lock(&chn->lock);
|
|
evtchn_port_set_pending(d, chn->notify_vcpu_id, chn);
|
|
+ spin_unlock(&chn->lock);
|
|
|
|
out:
|
|
spin_unlock_irqrestore(&v->virq_lock, flags);
|
|
@@ -823,6 +830,7 @@ void send_guest_pirq(struct domain *d, c
|
|
{
|
|
int port;
|
|
struct evtchn *chn;
|
|
+ unsigned long flags;
|
|
|
|
/*
|
|
* PV guests: It should not be possible to race with __evtchn_close(). The
|
|
@@ -837,7 +845,9 @@ void send_guest_pirq(struct domain *d, c
|
|
}
|
|
|
|
chn = evtchn_from_port(d, port);
|
|
+ spin_lock_irqsave(&chn->lock, flags);
|
|
evtchn_port_set_pending(d, chn->notify_vcpu_id, chn);
|
|
+ spin_unlock_irqrestore(&chn->lock, flags);
|
|
}
|
|
|
|
static struct domain *global_virq_handlers[NR_VIRQS] __read_mostly;
|
|
@@ -1034,12 +1044,15 @@ int evtchn_unmask(unsigned int port)
|
|
{
|
|
struct domain *d = current->domain;
|
|
struct evtchn *evtchn;
|
|
+ unsigned long flags;
|
|
|
|
if ( unlikely(!port_is_valid(d, port)) )
|
|
return -EINVAL;
|
|
|
|
evtchn = evtchn_from_port(d, port);
|
|
+ spin_lock_irqsave(&evtchn->lock, flags);
|
|
evtchn_port_unmask(d, evtchn);
|
|
+ spin_unlock_irqrestore(&evtchn->lock, flags);
|
|
|
|
return 0;
|
|
}
|
|
@@ -1449,8 +1462,8 @@ static void domain_dump_evtchn_info(stru
|
|
|
|
printk(" %4u [%d/%d/",
|
|
port,
|
|
- evtchn_port_is_pending(d, port),
|
|
- evtchn_port_is_masked(d, port));
|
|
+ evtchn_is_pending(d, chn),
|
|
+ evtchn_is_masked(d, chn));
|
|
evtchn_port_print_state(d, chn);
|
|
printk("]: s=%d n=%d x=%d",
|
|
chn->state, chn->notify_vcpu_id, chn->xen_consumer);
|
|
--- a/xen/common/event_fifo.c
|
|
+++ b/xen/common/event_fifo.c
|
|
@@ -296,23 +296,26 @@ static void evtchn_fifo_unmask(struct do
|
|
evtchn_fifo_set_pending(v, evtchn);
|
|
}
|
|
|
|
-static bool evtchn_fifo_is_pending(const struct domain *d, evtchn_port_t port)
|
|
+static bool evtchn_fifo_is_pending(const struct domain *d,
|
|
+ const struct evtchn *evtchn)
|
|
{
|
|
- const event_word_t *word = evtchn_fifo_word_from_port(d, port);
|
|
+ const event_word_t *word = evtchn_fifo_word_from_port(d, evtchn->port);
|
|
|
|
return word && guest_test_bit(d, EVTCHN_FIFO_PENDING, word);
|
|
}
|
|
|
|
-static bool_t evtchn_fifo_is_masked(const struct domain *d, evtchn_port_t port)
|
|
+static bool_t evtchn_fifo_is_masked(const struct domain *d,
|
|
+ const struct evtchn *evtchn)
|
|
{
|
|
- const event_word_t *word = evtchn_fifo_word_from_port(d, port);
|
|
+ const event_word_t *word = evtchn_fifo_word_from_port(d, evtchn->port);
|
|
|
|
return !word || guest_test_bit(d, EVTCHN_FIFO_MASKED, word);
|
|
}
|
|
|
|
-static bool_t evtchn_fifo_is_busy(const struct domain *d, evtchn_port_t port)
|
|
+static bool_t evtchn_fifo_is_busy(const struct domain *d,
|
|
+ const struct evtchn *evtchn)
|
|
{
|
|
- const event_word_t *word = evtchn_fifo_word_from_port(d, port);
|
|
+ const event_word_t *word = evtchn_fifo_word_from_port(d, evtchn->port);
|
|
|
|
return word && guest_test_bit(d, EVTCHN_FIFO_LINKED, word);
|
|
}
|
|
--- a/xen/include/asm-x86/event.h
|
|
+++ b/xen/include/asm-x86/event.h
|
|
@@ -47,4 +47,10 @@ static inline bool arch_virq_is_global(u
|
|
return true;
|
|
}
|
|
|
|
+#ifdef CONFIG_PV_SHIM
|
|
+# include <asm/pv/shim.h>
|
|
+# define arch_evtchn_is_special(chn) \
|
|
+ (pv_shim && (chn)->port && (chn)->state == ECS_RESERVED)
|
|
+#endif
|
|
+
|
|
#endif
|
|
--- a/xen/include/xen/event.h
|
|
+++ b/xen/include/xen/event.h
|
|
@@ -133,6 +133,24 @@ static inline struct evtchn *evtchn_from
|
|
return bucket_from_port(d, p) + (p % EVTCHNS_PER_BUCKET);
|
|
}
|
|
|
|
+/*
|
|
+ * "usable" as in "by a guest", i.e. Xen consumed channels are assumed to be
|
|
+ * taken care of separately where used for Xen's internal purposes.
|
|
+ */
|
|
+static bool evtchn_usable(const struct evtchn *evtchn)
|
|
+{
|
|
+ if ( evtchn->xen_consumer )
|
|
+ return false;
|
|
+
|
|
+#ifdef arch_evtchn_is_special
|
|
+ if ( arch_evtchn_is_special(evtchn) )
|
|
+ return true;
|
|
+#endif
|
|
+
|
|
+ BUILD_BUG_ON(ECS_FREE > ECS_RESERVED);
|
|
+ return evtchn->state > ECS_RESERVED;
|
|
+}
|
|
+
|
|
/* Wait on a Xen-attached event channel. */
|
|
#define wait_on_xen_event_channel(port, condition) \
|
|
do { \
|
|
@@ -165,19 +183,24 @@ int evtchn_reset(struct domain *d);
|
|
|
|
/*
|
|
* Low-level event channel port ops.
|
|
+ *
|
|
+ * All hooks have to be called with a lock held which prevents the channel
|
|
+ * from changing state. This may be the domain event lock, the per-channel
|
|
+ * lock, or in the case of sending interdomain events also the other side's
|
|
+ * per-channel lock. Exceptions apply in certain cases for the PV shim.
|
|
*/
|
|
struct evtchn_port_ops {
|
|
void (*init)(struct domain *d, struct evtchn *evtchn);
|
|
void (*set_pending)(struct vcpu *v, struct evtchn *evtchn);
|
|
void (*clear_pending)(struct domain *d, struct evtchn *evtchn);
|
|
void (*unmask)(struct domain *d, struct evtchn *evtchn);
|
|
- bool (*is_pending)(const struct domain *d, evtchn_port_t port);
|
|
- bool (*is_masked)(const struct domain *d, evtchn_port_t port);
|
|
+ bool (*is_pending)(const struct domain *d, const struct evtchn *evtchn);
|
|
+ bool (*is_masked)(const struct domain *d, const struct evtchn *evtchn);
|
|
/*
|
|
* Is the port unavailable because it's still being cleaned up
|
|
* after being closed?
|
|
*/
|
|
- bool (*is_busy)(const struct domain *d, evtchn_port_t port);
|
|
+ bool (*is_busy)(const struct domain *d, const struct evtchn *evtchn);
|
|
int (*set_priority)(struct domain *d, struct evtchn *evtchn,
|
|
unsigned int priority);
|
|
void (*print_state)(struct domain *d, const struct evtchn *evtchn);
|
|
@@ -193,38 +216,67 @@ static inline void evtchn_port_set_pendi
|
|
unsigned int vcpu_id,
|
|
struct evtchn *evtchn)
|
|
{
|
|
- d->evtchn_port_ops->set_pending(d->vcpu[vcpu_id], evtchn);
|
|
+ if ( evtchn_usable(evtchn) )
|
|
+ d->evtchn_port_ops->set_pending(d->vcpu[vcpu_id], evtchn);
|
|
}
|
|
|
|
static inline void evtchn_port_clear_pending(struct domain *d,
|
|
struct evtchn *evtchn)
|
|
{
|
|
- d->evtchn_port_ops->clear_pending(d, evtchn);
|
|
+ if ( evtchn_usable(evtchn) )
|
|
+ d->evtchn_port_ops->clear_pending(d, evtchn);
|
|
}
|
|
|
|
static inline void evtchn_port_unmask(struct domain *d,
|
|
struct evtchn *evtchn)
|
|
{
|
|
- d->evtchn_port_ops->unmask(d, evtchn);
|
|
+ if ( evtchn_usable(evtchn) )
|
|
+ d->evtchn_port_ops->unmask(d, evtchn);
|
|
}
|
|
|
|
-static inline bool evtchn_port_is_pending(const struct domain *d,
|
|
- evtchn_port_t port)
|
|
+static inline bool evtchn_is_pending(const struct domain *d,
|
|
+ const struct evtchn *evtchn)
|
|
{
|
|
- return d->evtchn_port_ops->is_pending(d, port);
|
|
+ return evtchn_usable(evtchn) && d->evtchn_port_ops->is_pending(d, evtchn);
|
|
}
|
|
|
|
-static inline bool evtchn_port_is_masked(const struct domain *d,
|
|
- evtchn_port_t port)
|
|
+static inline bool evtchn_port_is_pending(struct domain *d, evtchn_port_t port)
|
|
{
|
|
- return d->evtchn_port_ops->is_masked(d, port);
|
|
+ struct evtchn *evtchn = evtchn_from_port(d, port);
|
|
+ bool rc;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&evtchn->lock, flags);
|
|
+ rc = evtchn_is_pending(d, evtchn);
|
|
+ spin_unlock_irqrestore(&evtchn->lock, flags);
|
|
+
|
|
+ return rc;
|
|
+}
|
|
+
|
|
+static inline bool evtchn_is_masked(const struct domain *d,
|
|
+ const struct evtchn *evtchn)
|
|
+{
|
|
+ return !evtchn_usable(evtchn) || d->evtchn_port_ops->is_masked(d, evtchn);
|
|
+}
|
|
+
|
|
+static inline bool evtchn_port_is_masked(struct domain *d, evtchn_port_t port)
|
|
+{
|
|
+ struct evtchn *evtchn = evtchn_from_port(d, port);
|
|
+ bool rc;
|
|
+ unsigned long flags;
|
|
+
|
|
+ spin_lock_irqsave(&evtchn->lock, flags);
|
|
+ rc = evtchn_is_masked(d, evtchn);
|
|
+ spin_unlock_irqrestore(&evtchn->lock, flags);
|
|
+
|
|
+ return rc;
|
|
}
|
|
|
|
-static inline bool evtchn_port_is_busy(const struct domain *d,
|
|
- evtchn_port_t port)
|
|
+static inline bool evtchn_is_busy(const struct domain *d,
|
|
+ const struct evtchn *evtchn)
|
|
{
|
|
return d->evtchn_port_ops->is_busy &&
|
|
- d->evtchn_port_ops->is_busy(d, port);
|
|
+ d->evtchn_port_ops->is_busy(d, evtchn);
|
|
}
|
|
|
|
static inline int evtchn_port_set_priority(struct domain *d,
|
|
@@ -233,6 +285,8 @@ static inline int evtchn_port_set_priori
|
|
{
|
|
if ( !d->evtchn_port_ops->set_priority )
|
|
return -ENOSYS;
|
|
+ if ( !evtchn_usable(evtchn) )
|
|
+ return -EACCES;
|
|
return d->evtchn_port_ops->set_priority(d, evtchn, priority);
|
|
}
|
|
|