xen/events: avoid removing an event channel while handling it

Today it can happen that an event channel is being removed from the
system while the event handling loop is active. This can lead to a
race resulting in crashes or WARN() splats when trying to access the
irq_info structure related to the event channel.

Fix this problem by using a rwlock taken as reader in the event
handling loop and as writer when deallocating the irq_info structure.

As the observed problem was a NULL dereference in evtchn_from_irq()
make this function more robust against races by testing the irq_info
pointer to be not NULL before dereferencing it.

And finally make all accesses to evtchn_to_irq[row][col] atomic ones
in order to avoid seeing partial updates of an array element in irq
handling. Note that irq handling can be entered only for event channels
which have been valid before, so any not populated row isn't a problem
in this regard, as rows are only ever added and never removed.

This is XSA-331.

Cc: stable@vger.kernel.org
Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Reported-by: Jinoh Kang <luke1337@theori.io>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Reviewed-by: Wei Liu <wl@xen.org>
This commit is contained in:
Juergen Gross 2020-09-07 15:47:27 +02:00
parent 32118f97f4
commit 073d0552ea
1 changed files with 36 additions and 5 deletions

View File

@ -33,6 +33,7 @@
#include <linux/slab.h>
#include <linux/irqnr.h>
#include <linux/pci.h>
#include <linux/spinlock.h>
#ifdef CONFIG_X86
#include <asm/desc.h>
@ -71,6 +72,23 @@ const struct evtchn_ops *evtchn_ops;
*/
static DEFINE_MUTEX(irq_mapping_update_lock);
/*
* Lock protecting event handling loop against removing event channels.
* Adding of event channels is no issue as the associated IRQ becomes active
* only after everything is setup (before request_[threaded_]irq() the handler
* can't be entered for an event, as the event channel will be unmasked only
* then).
*/
static DEFINE_RWLOCK(evtchn_rwlock);
/*
* Lock hierarchy:
*
* irq_mapping_update_lock
* evtchn_rwlock
* IRQ-desc lock
*/
static LIST_HEAD(xen_irq_list_head);
/* IRQ <-> VIRQ mapping. */
@ -105,7 +123,7 @@ static void clear_evtchn_to_irq_row(unsigned row)
unsigned col;
for (col = 0; col < EVTCHN_PER_ROW; col++)
evtchn_to_irq[row][col] = -1;
WRITE_ONCE(evtchn_to_irq[row][col], -1);
}
static void clear_evtchn_to_irq_all(void)
@ -142,7 +160,7 @@ static int set_evtchn_to_irq(evtchn_port_t evtchn, unsigned int irq)
clear_evtchn_to_irq_row(row);
}
evtchn_to_irq[row][col] = irq;
WRITE_ONCE(evtchn_to_irq[row][col], irq);
return 0;
}
@ -152,7 +170,7 @@ int get_evtchn_to_irq(evtchn_port_t evtchn)
return -1;
if (evtchn_to_irq[EVTCHN_ROW(evtchn)] == NULL)
return -1;
return evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)];
return READ_ONCE(evtchn_to_irq[EVTCHN_ROW(evtchn)][EVTCHN_COL(evtchn)]);
}
/* Get info for IRQ */
@ -261,10 +279,14 @@ static void xen_irq_info_cleanup(struct irq_info *info)
*/
evtchn_port_t evtchn_from_irq(unsigned irq)
{
if (WARN(irq >= nr_irqs, "Invalid irq %d!\n", irq))
const struct irq_info *info = NULL;
if (likely(irq < nr_irqs))
info = info_for_irq(irq);
if (!info)
return 0;
return info_for_irq(irq)->evtchn;
return info->evtchn;
}
unsigned int irq_from_evtchn(evtchn_port_t evtchn)
@ -440,16 +462,21 @@ static int __must_check xen_allocate_irq_gsi(unsigned gsi)
static void xen_free_irq(unsigned irq)
{
struct irq_info *info = info_for_irq(irq);
unsigned long flags;
if (WARN_ON(!info))
return;
write_lock_irqsave(&evtchn_rwlock, flags);
list_del(&info->list);
set_info_for_irq(irq, NULL);
WARN_ON(info->refcnt > 0);
write_unlock_irqrestore(&evtchn_rwlock, flags);
kfree(info);
/* Legacy IRQ descriptors are managed by the arch. */
@ -1233,6 +1260,8 @@ static void __xen_evtchn_do_upcall(void)
struct vcpu_info *vcpu_info = __this_cpu_read(xen_vcpu);
int cpu = smp_processor_id();
read_lock(&evtchn_rwlock);
do {
vcpu_info->evtchn_upcall_pending = 0;
@ -1243,6 +1272,8 @@ static void __xen_evtchn_do_upcall(void)
virt_rmb(); /* Hypervisor can set upcall pending. */
} while (vcpu_info->evtchn_upcall_pending);
read_unlock(&evtchn_rwlock);
}
void xen_evtchn_do_upcall(struct pt_regs *regs)