2013-04-18 04:30:26 +08:00
|
|
|
/*
|
|
|
|
* Copyright 2012 Michael Ellerman, IBM Corporation.
|
|
|
|
* Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
|
|
|
|
*
|
|
|
|
* This program is free software; you can redistribute it and/or modify
|
|
|
|
* it under the terms of the GNU General Public License, version 2, as
|
|
|
|
* published by the Free Software Foundation.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _KVM_PPC_BOOK3S_XICS_H
|
|
|
|
#define _KVM_PPC_BOOK3S_XICS_H
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We use a two-level tree to store interrupt source information.
|
|
|
|
* There are up to 1024 ICS nodes, each of which can represent
|
|
|
|
* 1024 sources.
|
|
|
|
*/
|
|
|
|
#define KVMPPC_XICS_MAX_ICS_ID 1023
|
|
|
|
#define KVMPPC_XICS_ICS_SHIFT 10
|
|
|
|
#define KVMPPC_XICS_IRQ_PER_ICS (1 << KVMPPC_XICS_ICS_SHIFT)
|
|
|
|
#define KVMPPC_XICS_SRC_MASK (KVMPPC_XICS_IRQ_PER_ICS - 1)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Interrupt source numbers below this are reserved, for example
|
|
|
|
* 0 is "no interrupt", and 2 is used for IPIs.
|
|
|
|
*/
|
|
|
|
#define KVMPPC_XICS_FIRST_IRQ 16
|
|
|
|
#define KVMPPC_XICS_NR_IRQS ((KVMPPC_XICS_MAX_ICS_ID + 1) * \
|
|
|
|
KVMPPC_XICS_IRQ_PER_ICS)
|
|
|
|
|
|
|
|
/* Priority value to use for disabling an interrupt */
|
|
|
|
#define MASKED 0xff
|
|
|
|
|
|
|
|
/* State for one irq source */
|
|
|
|
struct ics_irq_state {
|
|
|
|
u32 number;
|
|
|
|
u32 server;
|
|
|
|
u8 priority;
|
2013-04-18 04:32:04 +08:00
|
|
|
u8 saved_priority;
|
2013-04-18 04:30:26 +08:00
|
|
|
u8 resend;
|
|
|
|
u8 masked_pending;
|
2016-05-04 19:07:52 +08:00
|
|
|
u8 lsi; /* level-sensitive interrupt */
|
2013-04-18 04:30:26 +08:00
|
|
|
u8 asserted; /* Only for LSI */
|
|
|
|
u8 exists;
|
KVM: PPC: Book3S HV: Set server for passed-through interrupts
When a guest has a PCI pass-through device with an interrupt, it
will direct the interrupt to a particular guest VCPU. In fact the
physical interrupt might arrive on any CPU, and then get
delivered to the target VCPU in the emulated XICS (guest interrupt
controller), and eventually delivered to the target VCPU.
Now that we have code to handle device interrupts in real mode
without exiting to the host kernel, there is an advantage to having
the device interrupt arrive on the same sub(core) as the target
VCPU is running on. In this situation, the interrupt can be
delivered to the target VCPU without any exit to the host kernel
(using a hypervisor doorbell interrupt between threads if
necessary).
This patch aims to get passed-through device interrupts arriving
on the correct core by setting the interrupt server in the real
hardware XICS for the interrupt to the first thread in the (sub)core
where its target VCPU is running. We do this in the real-mode H_EOI
code because the H_EOI handler already needs to look at the
emulated ICS state for the interrupt (whereas the H_XIRR handler
doesn't), and we know we are running in the target VCPU context
at that point.
We set the server CPU in hardware using an OPAL call, regardless of
what the IRQ affinity mask for the interrupt says, and without
updating the affinity mask. This amounts to saying that when an
interrupt is passed through to a guest, as a matter of policy we
allow the guest's affinity for the interrupt to override the host's.
This is inspired by an earlier patch from Suresh Warrier, although
none of this code came from that earlier patch.
Signed-off-by: Paul Mackerras <paulus@ozlabs.org>
2016-08-19 13:35:56 +08:00
|
|
|
int intr_cpu;
|
|
|
|
u32 host_irq;
|
2013-04-18 04:30:26 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
/* Atomic ICP state, updated with a single compare & swap */
|
|
|
|
union kvmppc_icp_state {
|
|
|
|
unsigned long raw;
|
|
|
|
struct {
|
|
|
|
u8 out_ee:1;
|
|
|
|
u8 need_resend:1;
|
|
|
|
u8 cppr;
|
|
|
|
u8 mfrr;
|
|
|
|
u8 pending_pri;
|
|
|
|
u32 xisr;
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
/* One bit per ICS */
|
|
|
|
#define ICP_RESEND_MAP_SIZE (KVMPPC_XICS_MAX_ICS_ID / BITS_PER_LONG + 1)
|
|
|
|
|
|
|
|
struct kvmppc_icp {
|
|
|
|
struct kvm_vcpu *vcpu;
|
|
|
|
unsigned long server_num;
|
|
|
|
union kvmppc_icp_state state;
|
|
|
|
unsigned long resend_map[ICP_RESEND_MAP_SIZE];
|
2013-04-18 04:31:15 +08:00
|
|
|
|
|
|
|
/* Real mode might find something too hard, here's the action
|
|
|
|
* it might request from virtual mode
|
|
|
|
*/
|
|
|
|
#define XICS_RM_KICK_VCPU 0x1
|
|
|
|
#define XICS_RM_CHECK_RESEND 0x2
|
2014-06-30 18:51:14 +08:00
|
|
|
#define XICS_RM_NOTIFY_EOI 0x8
|
2013-04-18 04:31:15 +08:00
|
|
|
u32 rm_action;
|
|
|
|
struct kvm_vcpu *rm_kick_target;
|
KVM: PPC: Book3S HV: Fix inaccuracies in ICP emulation for H_IPI
This fixes some inaccuracies in the state machine for the virtualized
ICP when implementing the H_IPI hcall (Set_MFFR and related states):
1. The old code wipes out any pending interrupts when the new MFRR is
more favored than the CPPR but less favored than a pending
interrupt (by always modifying xisr and the pending_pri). This can
cause us to lose a pending external interrupt.
The correct code here is to only modify the pending_pri and xisr in
the ICP if the MFRR is equal to or more favored than the current
pending pri (since in this case, it is guaranteed that that there
cannot be a pending external interrupt). The code changes are
required in both kvmppc_rm_h_ipi and kvmppc_h_ipi.
2. Again, in both kvmppc_rm_h_ipi and kvmppc_h_ipi, there is a check
for whether MFRR is being made less favored AND further if new MFFR
is also less favored than the current CPPR, we check for any
resends pending in the ICP. These checks look like they are
designed to cover the case where if the MFRR is being made less
favored, we opportunistically trigger a resend of any interrupts
that had been previously rejected. Although, this is not a state
described by PAPR, this is an action we actually need to do
especially if the CPPR is already at 0xFF. Because in this case,
the resend bit will stay on until another ICP state change which
may be a long time coming and the interrupt stays pending until
then. The current code which checks for MFRR < CPPR is broken when
CPPR is 0xFF since it will not get triggered in that case.
Ideally, we would want to do a resend only if
prio(pending_interrupt) < mfrr && prio(pending_interrupt) < cppr
where pending interrupt is the one that was rejected. But we don't
have the priority of the pending interrupt state saved, so we
simply trigger a resend whenever the MFRR is made less favored.
3. In kvmppc_rm_h_ipi, where we save state to pass resends to the
virtual mode, we also need to save the ICP whose need_resend we
reset since this does not need to be my ICP (vcpu->arch.icp) as is
incorrectly assumed by the current code. A new field rm_resend_icp
is added to the kvmppc_icp structure for this purpose.
Signed-off-by: Suresh Warrier <warrier@linux.vnet.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
2014-11-03 12:51:59 +08:00
|
|
|
struct kvmppc_icp *rm_resend_icp;
|
2013-04-18 04:31:15 +08:00
|
|
|
u32 rm_reject;
|
2014-06-30 18:51:14 +08:00
|
|
|
u32 rm_eoied_irq;
|
2013-04-18 04:31:15 +08:00
|
|
|
|
2015-03-20 17:39:45 +08:00
|
|
|
/* Counters for each reason we exited real mode */
|
|
|
|
unsigned long n_rm_kick_vcpu;
|
|
|
|
unsigned long n_rm_check_resend;
|
|
|
|
unsigned long n_rm_notify_eoi;
|
2015-03-20 17:39:48 +08:00
|
|
|
/* Counters for handling ICP processing in real mode */
|
|
|
|
unsigned long n_check_resend;
|
|
|
|
unsigned long n_reject;
|
2015-03-20 17:39:45 +08:00
|
|
|
|
2013-04-18 04:31:15 +08:00
|
|
|
/* Debug stuff for real mode */
|
|
|
|
union kvmppc_icp_state rm_dbgstate;
|
|
|
|
struct kvm_vcpu *rm_dbgtgt;
|
2013-04-18 04:30:26 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
struct kvmppc_ics {
|
2015-03-20 17:39:46 +08:00
|
|
|
arch_spinlock_t lock;
|
2013-04-18 04:30:26 +08:00
|
|
|
u16 icsid;
|
|
|
|
struct ics_irq_state irq_state[KVMPPC_XICS_IRQ_PER_ICS];
|
|
|
|
};
|
|
|
|
|
|
|
|
struct kvmppc_xics {
|
|
|
|
struct kvm *kvm;
|
2013-04-27 08:28:37 +08:00
|
|
|
struct kvm_device *dev;
|
2013-04-18 04:30:26 +08:00
|
|
|
struct dentry *dentry;
|
|
|
|
u32 max_icsid;
|
2013-04-18 04:31:15 +08:00
|
|
|
bool real_mode;
|
|
|
|
bool real_mode_dbg;
|
2015-03-20 17:39:48 +08:00
|
|
|
u32 err_noics;
|
|
|
|
u32 err_noicp;
|
2013-04-18 04:30:26 +08:00
|
|
|
struct kvmppc_ics *ics[KVMPPC_XICS_MAX_ICS_ID + 1];
|
|
|
|
};
|
|
|
|
|
|
|
|
static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm,
|
|
|
|
u32 nr)
|
|
|
|
{
|
|
|
|
struct kvm_vcpu *vcpu = NULL;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
kvm_for_each_vcpu(i, vcpu, kvm) {
|
|
|
|
if (vcpu->arch.icp && nr == vcpu->arch.icp->server_num)
|
|
|
|
return vcpu->arch.icp;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
|
|
|
|
u32 irq, u16 *source)
|
|
|
|
{
|
|
|
|
u32 icsid = irq >> KVMPPC_XICS_ICS_SHIFT;
|
|
|
|
u16 src = irq & KVMPPC_XICS_SRC_MASK;
|
|
|
|
struct kvmppc_ics *ics;
|
|
|
|
|
|
|
|
if (source)
|
|
|
|
*source = src;
|
|
|
|
if (icsid > KVMPPC_XICS_MAX_ICS_ID)
|
|
|
|
return NULL;
|
|
|
|
ics = xics->ics[icsid];
|
|
|
|
if (!ics)
|
|
|
|
return NULL;
|
|
|
|
return ics;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#endif /* _KVM_PPC_BOOK3S_XICS_H */
|