linux/arch/s390/include/asm/kvm_host.h

713 lines
19 KiB
C
Raw Normal View History

/*
* definition for kernel virtual machines on s390
*
* Copyright IBM Corp. 2008, 2009
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License (version 2 only)
* as published by the Free Software Foundation.
*
* Author(s): Carsten Otte <cotte@de.ibm.com>
*/
#ifndef ASM_KVM_HOST_H
#define ASM_KVM_HOST_H
#include <linux/types.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
#include <linux/kvm.h>
KVM: s390: protect VCPU cpu timer with a seqcount For now, only the owning VCPU thread (that has loaded the VCPU) can get a consistent cpu timer value when calculating the delta. However, other threads might also be interested in a more recent, consistent value. Of special interest will be the timer callback of a VCPU that executes without having the VCPU loaded and could run in parallel with the VCPU thread. The cpu timer has a nice property: it is only updated by the owning VCPU thread. And speaking about accounting, a consistent value can only be calculated by looking at cputm_start and the cpu timer itself in one shot, otherwise the result might be wrong. As we only have one writing thread at a time (owning VCPU thread), we can use a seqcount instead of a seqlock and retry if the VCPU refreshed its cpu timer. This avoids any heavy locking and only introduces a counter update/check plus a handful of smp_wmb(). The owning VCPU thread should never have to retry on reads, and also for other threads this might be a very rare scenario. Please note that we have to use the raw_* variants for locking the seqcount as lockdep will produce false warnings otherwise. The rq->lock held during vcpu_load/put is also acquired from hardirq context. Lockdep cannot know that we avoid potential deadlocks by disabling preemption and thereby disable concurrent write locking attempts (via vcpu_put/load). Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2016-02-18 04:53:33 +08:00
#include <linux/seqlock.h>
#include <asm/debug.h>
#include <asm/cpu.h>
#include <asm/fpu/api.h>
#include <asm/isc.h>
#define KVM_S390_BSCA_CPU_SLOTS 64
#define KVM_S390_ESCA_CPU_SLOTS 248
#define KVM_MAX_VCPUS KVM_S390_ESCA_CPU_SLOTS
#define KVM_USER_MEM_SLOTS 32
/*
* These seem to be used for allocating ->chip in the routing table,
* which we don't use. 4096 is an out-of-thin-air value. If we need
* to look at ->chip later on, we'll need to revisit this.
*/
#define KVM_NR_IRQCHIPS 1
#define KVM_IRQCHIP_NUM_PINS 4096
#define KVM_HALT_POLL_NS_DEFAULT 80000
/* s390-specific vcpu->requests bit members */
#define KVM_REQ_ENABLE_IBS 8
#define KVM_REQ_DISABLE_IBS 9
#define SIGP_CTRL_C 0x80
#define SIGP_CTRL_SCN_MASK 0x3f
union bsca_sigp_ctrl {
__u8 value;
struct {
__u8 c : 1;
__u8 r : 1;
__u8 scn : 6;
};
} __packed;
union esca_sigp_ctrl {
__u16 value;
struct {
__u8 c : 1;
__u8 reserved: 7;
__u8 scn;
};
} __packed;
struct esca_entry {
union esca_sigp_ctrl sigp_ctrl;
__u16 reserved1[3];
__u64 sda;
__u64 reserved2[6];
} __packed;
struct bsca_entry {
__u8 reserved0;
union bsca_sigp_ctrl sigp_ctrl;
__u16 reserved[3];
__u64 sda;
__u64 reserved2[2];
} __attribute__((packed));
union ipte_control {
unsigned long val;
struct {
unsigned long k : 1;
unsigned long kh : 31;
unsigned long kg : 32;
};
};
struct bsca_block {
union ipte_control ipte_control;
__u64 reserved[5];
__u64 mcn;
__u64 reserved2;
struct bsca_entry cpu[KVM_S390_BSCA_CPU_SLOTS];
} __attribute__((packed));
struct esca_block {
union ipte_control ipte_control;
__u64 reserved1[7];
__u64 mcn[4];
__u64 reserved2[20];
struct esca_entry cpu[KVM_S390_ESCA_CPU_SLOTS];
} __packed;
#define CPUSTAT_STOPPED 0x80000000
#define CPUSTAT_WAIT 0x10000000
#define CPUSTAT_ECALL_PEND 0x08000000
#define CPUSTAT_STOP_INT 0x04000000
#define CPUSTAT_IO_INT 0x02000000
#define CPUSTAT_EXT_INT 0x01000000
#define CPUSTAT_RUNNING 0x00800000
#define CPUSTAT_RETAINED 0x00400000
#define CPUSTAT_TIMING_SUB 0x00020000
#define CPUSTAT_SIE_SUB 0x00010000
#define CPUSTAT_RRF 0x00008000
#define CPUSTAT_SLSV 0x00004000
#define CPUSTAT_SLSR 0x00002000
#define CPUSTAT_ZARCH 0x00000800
#define CPUSTAT_MCDS 0x00000100
#define CPUSTAT_SM 0x00000080
#define CPUSTAT_IBS 0x00000040
#define CPUSTAT_GED2 0x00000010
#define CPUSTAT_G 0x00000008
#define CPUSTAT_GED 0x00000004
#define CPUSTAT_J 0x00000002
#define CPUSTAT_P 0x00000001
struct kvm_s390_sie_block {
atomic_t cpuflags; /* 0x0000 */
__u32 : 1; /* 0x0004 */
__u32 prefix : 18;
__u32 : 1;
__u32 ibc : 12;
__u8 reserved08[4]; /* 0x0008 */
#define PROG_IN_SIE (1<<0)
__u32 prog0c; /* 0x000c */
__u8 reserved10[16]; /* 0x0010 */
#define PROG_BLOCK_SIE (1<<0)
#define PROG_REQUEST (1<<1)
atomic_t prog20; /* 0x0020 */
__u8 reserved24[4]; /* 0x0024 */
__u64 cputm; /* 0x0028 */
__u64 ckc; /* 0x0030 */
__u64 epoch; /* 0x0038 */
__u8 reserved40[4]; /* 0x0040 */
KVM: s390: interrupt subsystem, cpu timer, waitpsw This patch contains the s390 interrupt subsystem (similar to in kernel apic) including timer interrupts (similar to in-kernel-pit) and enabled wait (similar to in kernel hlt). In order to achieve that, this patch also introduces intercept handling for instruction intercepts, and it implements load control instructions. This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both the vm file descriptors and the vcpu file descriptors. In case this ioctl is issued against a vm file descriptor, the interrupt is considered floating. Floating interrupts may be delivered to any virtual cpu in the configuration. The following interrupts are supported: SIGP STOP - interprocessor signal that stops a remote cpu SIGP SET PREFIX - interprocessor signal that sets the prefix register of a (stopped) remote cpu INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed and for smp_call_function() in the guest. PROGRAM INT - exception during program execution such as page fault, illegal instruction and friends RESTART - interprocessor signal that starts a stopped cpu INT VIRTIO - floating interrupt for virtio signalisation INT SERVICE - floating interrupt for signalisations from the system service processor struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting an interrupt, also carrys parameter data for interrupts along with the interrupt type. Interrupts on s390 usually have a state that represents the current operation, or identifies which device has caused the interruption on s390. kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a disabled wait (that is, disabled for interrupts), we exit to userspace. In case of an enabled wait we set up a timer that equals the cpu clock comparator value and sleep on a wait queue. [christian: change virtio interrupt to 0x2603] Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-26 01:47:26 +08:00
#define LCTL_CR0 0x8000
#define LCTL_CR6 0x0200
#define LCTL_CR9 0x0040
#define LCTL_CR10 0x0020
#define LCTL_CR11 0x0010
#define LCTL_CR14 0x0002
__u16 lctl; /* 0x0044 */
__s16 icpua; /* 0x0046 */
#define ICTL_OPEREXC 0x80000000
#define ICTL_PINT 0x20000000
#define ICTL_LPSW 0x00400000
#define ICTL_STCTL 0x00040000
#define ICTL_ISKE 0x00004000
#define ICTL_SSKE 0x00002000
#define ICTL_RRBE 0x00001000
#define ICTL_TPROT 0x00000200
__u32 ictl; /* 0x0048 */
__u32 eca; /* 0x004c */
#define ICPT_INST 0x04
#define ICPT_PROGI 0x08
#define ICPT_INSTPROGI 0x0C
#define ICPT_OPEREXC 0x2C
#define ICPT_PARTEXEC 0x38
#define ICPT_IOINST 0x40
__u8 icptcode; /* 0x0050 */
__u8 icptstatus; /* 0x0051 */
__u16 ihcpu; /* 0x0052 */
__u8 reserved54[2]; /* 0x0054 */
__u16 ipa; /* 0x0056 */
__u32 ipb; /* 0x0058 */
__u32 scaoh; /* 0x005c */
__u8 reserved60; /* 0x0060 */
__u8 ecb; /* 0x0061 */
__u8 ecb2; /* 0x0062 */
#define ECB3_AES 0x04
#define ECB3_DEA 0x08
__u8 ecb3; /* 0x0063 */
__u32 scaol; /* 0x0064 */
__u8 reserved68[4]; /* 0x0068 */
__u32 todpr; /* 0x006c */
__u8 reserved70[16]; /* 0x0070 */
__u64 mso; /* 0x0080 */
__u64 msl; /* 0x0088 */
psw_t gpsw; /* 0x0090 */
__u64 gg14; /* 0x00a0 */
__u64 gg15; /* 0x00a8 */
__u8 reservedb0[20]; /* 0x00b0 */
__u16 extcpuaddr; /* 0x00c4 */
__u16 eic; /* 0x00c6 */
__u32 reservedc8; /* 0x00c8 */
__u16 pgmilc; /* 0x00cc */
__u16 iprcc; /* 0x00ce */
__u32 dxc; /* 0x00d0 */
__u16 mcn; /* 0x00d4 */
__u8 perc; /* 0x00d6 */
__u8 peratmid; /* 0x00d7 */
__u64 peraddr; /* 0x00d8 */
__u8 eai; /* 0x00e0 */
__u8 peraid; /* 0x00e1 */
__u8 oai; /* 0x00e2 */
__u8 armid; /* 0x00e3 */
__u8 reservede4[4]; /* 0x00e4 */
__u64 tecmc; /* 0x00e8 */
__u8 reservedf0[12]; /* 0x00f0 */
#define CRYCB_FORMAT1 0x00000001
#define CRYCB_FORMAT2 0x00000003
__u32 crycbd; /* 0x00fc */
__u64 gcr[16]; /* 0x0100 */
__u64 gbea; /* 0x0180 */
__u8 reserved188[24]; /* 0x0188 */
__u32 fac; /* 0x01a0 */
__u8 reserved1a4[20]; /* 0x01a4 */
__u64 cbrlo; /* 0x01b8 */
__u8 reserved1c0[8]; /* 0x01c0 */
__u32 ecd; /* 0x01c8 */
__u8 reserved1cc[18]; /* 0x01cc */
__u64 pp; /* 0x01de */
__u8 reserved1e6[2]; /* 0x01e6 */
__u64 itdba; /* 0x01e8 */
__u64 riccbd; /* 0x01f0 */
__u8 reserved1f8[8]; /* 0x01f8 */
} __attribute__((packed));
struct kvm_s390_itdb {
__u8 data[256];
} __packed;
struct sie_page {
struct kvm_s390_sie_block sie_block;
__u8 reserved200[1024]; /* 0x0200 */
struct kvm_s390_itdb itdb; /* 0x0600 */
__u8 reserved700[2304]; /* 0x0700 */
} __packed;
struct kvm_vcpu_stat {
u32 exit_userspace;
u32 exit_null;
u32 exit_external_request;
u32 exit_external_interrupt;
u32 exit_stop_request;
u32 exit_validity;
KVM: s390: interrupt subsystem, cpu timer, waitpsw This patch contains the s390 interrupt subsystem (similar to in kernel apic) including timer interrupts (similar to in-kernel-pit) and enabled wait (similar to in kernel hlt). In order to achieve that, this patch also introduces intercept handling for instruction intercepts, and it implements load control instructions. This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both the vm file descriptors and the vcpu file descriptors. In case this ioctl is issued against a vm file descriptor, the interrupt is considered floating. Floating interrupts may be delivered to any virtual cpu in the configuration. The following interrupts are supported: SIGP STOP - interprocessor signal that stops a remote cpu SIGP SET PREFIX - interprocessor signal that sets the prefix register of a (stopped) remote cpu INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed and for smp_call_function() in the guest. PROGRAM INT - exception during program execution such as page fault, illegal instruction and friends RESTART - interprocessor signal that starts a stopped cpu INT VIRTIO - floating interrupt for virtio signalisation INT SERVICE - floating interrupt for signalisations from the system service processor struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting an interrupt, also carrys parameter data for interrupts along with the interrupt type. Interrupts on s390 usually have a state that represents the current operation, or identifies which device has caused the interruption on s390. kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a disabled wait (that is, disabled for interrupts), we exit to userspace. In case of an enabled wait we set up a timer that equals the cpu clock comparator value and sleep on a wait queue. [christian: change virtio interrupt to 0x2603] Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-26 01:47:26 +08:00
u32 exit_instruction;
kvm: add halt_poll_ns module parameter This patch introduces a new module parameter for the KVM module; when it is present, KVM attempts a bit of polling on every HLT before scheduling itself out via kvm_vcpu_block. This parameter helps a lot for latency-bound workloads---in particular I tested it with O_DSYNC writes with a battery-backed disk in the host. In this case, writes are fast (because the data doesn't have to go all the way to the platters) but they cannot be merged by either the host or the guest. KVM's performance here is usually around 30% of bare metal, or 50% if you use cache=directsync or cache=writethrough (these parameters avoid that the guest sends pointless flush requests, and at the same time they are not slow because of the battery-backed cache). The bad performance happens because on every halt the host CPU decides to halt itself too. When the interrupt comes, the vCPU thread is then migrated to a new physical CPU, and in general the latency is horrible because the vCPU thread has to be scheduled back in. With this patch performance reaches 60-65% of bare metal and, more important, 99% of what you get if you use idle=poll in the guest. This means that the tunable gets rid of this particular bottleneck, and more work can be done to improve performance in the kernel or QEMU. Of course there is some price to pay; every time an otherwise idle vCPUs is interrupted by an interrupt, it will poll unnecessarily and thus impose a little load on the host. The above results were obtained with a mostly random value of the parameter (500000), and the load was around 1.5-2.5% CPU usage on one of the host's core for each idle guest vCPU. The patch also adds a new stat, /sys/kernel/debug/kvm/halt_successful_poll, that can be used to tune the parameter. It counts how many HLT instructions received an interrupt during the polling period; each successful poll avoids that Linux schedules the VCPU thread out and back in, and may also avoid a likely trip to C1 and back for the physical CPU. While the VM is idle, a Linux 4 VCPU VM halts around 10 times per second. Of these halts, almost all are failed polls. During the benchmark, instead, basically all halts end within the polling period, except a more or less constant stream of 50 per second coming from vCPUs that are not running the benchmark. The wasted time is thus very low. Things may be slightly different for Windows VMs, which have a ~10 ms timer tick. The effect is also visible on Marcelo's recently-introduced latency test for the TSC deadline timer. Though of course a non-RT kernel has awful latency bounds, the latency of the timer is around 8000-10000 clock cycles compared to 20000-120000 without setting halt_poll_ns. For the TSC deadline timer, thus, the effect is both a smaller average latency and a smaller variance. Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2015-02-05 01:20:58 +08:00
u32 halt_successful_poll;
u32 halt_attempted_poll;
KVM: halt_polling: provide a way to qualify wakeups during poll Some wakeups should not be considered a sucessful poll. For example on s390 I/O interrupts are usually floating, which means that _ALL_ CPUs would be considered runnable - letting all vCPUs poll all the time for transactional like workload, even if one vCPU would be enough. This can result in huge CPU usage for large guests. This patch lets architectures provide a way to qualify wakeups if they should be considered a good/bad wakeups in regard to polls. For s390 the implementation will fence of halt polling for anything but known good, single vCPU events. The s390 implementation for floating interrupts does a wakeup for one vCPU, but the interrupt will be delivered by whatever CPU checks first for a pending interrupt. We prefer the woken up CPU by marking the poll of this CPU as "good" poll. This code will also mark several other wakeup reasons like IPI or expired timers as "good". This will of course also mark some events as not sucessful. As KVM on z runs always as a 2nd level hypervisor, we prefer to not poll, unless we are really sure, though. This patch successfully limits the CPU usage for cases like uperf 1byte transactional ping pong workload or wakeup heavy workload like OLTP while still providing a proper speedup. This also introduced a new vcpu stat "halt_poll_no_tuning" that marks wakeups that are considered not good for polling. Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Acked-by: Radim Krčmář <rkrcmar@redhat.com> (for an earlier version) Cc: David Matlack <dmatlack@google.com> Cc: Wanpeng Li <kernellwp@gmail.com> [Rename config symbol. - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-05-13 18:16:35 +08:00
u32 halt_poll_invalid;
u32 halt_wakeup;
KVM: s390: interrupt subsystem, cpu timer, waitpsw This patch contains the s390 interrupt subsystem (similar to in kernel apic) including timer interrupts (similar to in-kernel-pit) and enabled wait (similar to in kernel hlt). In order to achieve that, this patch also introduces intercept handling for instruction intercepts, and it implements load control instructions. This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both the vm file descriptors and the vcpu file descriptors. In case this ioctl is issued against a vm file descriptor, the interrupt is considered floating. Floating interrupts may be delivered to any virtual cpu in the configuration. The following interrupts are supported: SIGP STOP - interprocessor signal that stops a remote cpu SIGP SET PREFIX - interprocessor signal that sets the prefix register of a (stopped) remote cpu INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed and for smp_call_function() in the guest. PROGRAM INT - exception during program execution such as page fault, illegal instruction and friends RESTART - interprocessor signal that starts a stopped cpu INT VIRTIO - floating interrupt for virtio signalisation INT SERVICE - floating interrupt for signalisations from the system service processor struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting an interrupt, also carrys parameter data for interrupts along with the interrupt type. Interrupts on s390 usually have a state that represents the current operation, or identifies which device has caused the interruption on s390. kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a disabled wait (that is, disabled for interrupts), we exit to userspace. In case of an enabled wait we set up a timer that equals the cpu clock comparator value and sleep on a wait queue. [christian: change virtio interrupt to 0x2603] Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-26 01:47:26 +08:00
u32 instruction_lctl;
u32 instruction_lctlg;
u32 instruction_stctl;
u32 instruction_stctg;
KVM: s390: interrupt subsystem, cpu timer, waitpsw This patch contains the s390 interrupt subsystem (similar to in kernel apic) including timer interrupts (similar to in-kernel-pit) and enabled wait (similar to in kernel hlt). In order to achieve that, this patch also introduces intercept handling for instruction intercepts, and it implements load control instructions. This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both the vm file descriptors and the vcpu file descriptors. In case this ioctl is issued against a vm file descriptor, the interrupt is considered floating. Floating interrupts may be delivered to any virtual cpu in the configuration. The following interrupts are supported: SIGP STOP - interprocessor signal that stops a remote cpu SIGP SET PREFIX - interprocessor signal that sets the prefix register of a (stopped) remote cpu INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed and for smp_call_function() in the guest. PROGRAM INT - exception during program execution such as page fault, illegal instruction and friends RESTART - interprocessor signal that starts a stopped cpu INT VIRTIO - floating interrupt for virtio signalisation INT SERVICE - floating interrupt for signalisations from the system service processor struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting an interrupt, also carrys parameter data for interrupts along with the interrupt type. Interrupts on s390 usually have a state that represents the current operation, or identifies which device has caused the interruption on s390. kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a disabled wait (that is, disabled for interrupts), we exit to userspace. In case of an enabled wait we set up a timer that equals the cpu clock comparator value and sleep on a wait queue. [christian: change virtio interrupt to 0x2603] Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-26 01:47:26 +08:00
u32 exit_program_interruption;
u32 exit_instr_and_program;
u32 exit_operation_exception;
u32 deliver_external_call;
KVM: s390: interrupt subsystem, cpu timer, waitpsw This patch contains the s390 interrupt subsystem (similar to in kernel apic) including timer interrupts (similar to in-kernel-pit) and enabled wait (similar to in kernel hlt). In order to achieve that, this patch also introduces intercept handling for instruction intercepts, and it implements load control instructions. This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both the vm file descriptors and the vcpu file descriptors. In case this ioctl is issued against a vm file descriptor, the interrupt is considered floating. Floating interrupts may be delivered to any virtual cpu in the configuration. The following interrupts are supported: SIGP STOP - interprocessor signal that stops a remote cpu SIGP SET PREFIX - interprocessor signal that sets the prefix register of a (stopped) remote cpu INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed and for smp_call_function() in the guest. PROGRAM INT - exception during program execution such as page fault, illegal instruction and friends RESTART - interprocessor signal that starts a stopped cpu INT VIRTIO - floating interrupt for virtio signalisation INT SERVICE - floating interrupt for signalisations from the system service processor struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting an interrupt, also carrys parameter data for interrupts along with the interrupt type. Interrupts on s390 usually have a state that represents the current operation, or identifies which device has caused the interruption on s390. kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a disabled wait (that is, disabled for interrupts), we exit to userspace. In case of an enabled wait we set up a timer that equals the cpu clock comparator value and sleep on a wait queue. [christian: change virtio interrupt to 0x2603] Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-26 01:47:26 +08:00
u32 deliver_emergency_signal;
u32 deliver_service_signal;
u32 deliver_virtio_interrupt;
u32 deliver_stop_signal;
u32 deliver_prefix_signal;
u32 deliver_restart_signal;
u32 deliver_program_int;
u32 deliver_io_int;
KVM: s390: interrupt subsystem, cpu timer, waitpsw This patch contains the s390 interrupt subsystem (similar to in kernel apic) including timer interrupts (similar to in-kernel-pit) and enabled wait (similar to in kernel hlt). In order to achieve that, this patch also introduces intercept handling for instruction intercepts, and it implements load control instructions. This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both the vm file descriptors and the vcpu file descriptors. In case this ioctl is issued against a vm file descriptor, the interrupt is considered floating. Floating interrupts may be delivered to any virtual cpu in the configuration. The following interrupts are supported: SIGP STOP - interprocessor signal that stops a remote cpu SIGP SET PREFIX - interprocessor signal that sets the prefix register of a (stopped) remote cpu INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed and for smp_call_function() in the guest. PROGRAM INT - exception during program execution such as page fault, illegal instruction and friends RESTART - interprocessor signal that starts a stopped cpu INT VIRTIO - floating interrupt for virtio signalisation INT SERVICE - floating interrupt for signalisations from the system service processor struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting an interrupt, also carrys parameter data for interrupts along with the interrupt type. Interrupts on s390 usually have a state that represents the current operation, or identifies which device has caused the interruption on s390. kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a disabled wait (that is, disabled for interrupts), we exit to userspace. In case of an enabled wait we set up a timer that equals the cpu clock comparator value and sleep on a wait queue. [christian: change virtio interrupt to 0x2603] Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-26 01:47:26 +08:00
u32 exit_wait_state;
u32 instruction_pfmf;
u32 instruction_stidp;
u32 instruction_spx;
u32 instruction_stpx;
u32 instruction_stap;
u32 instruction_storage_key;
u32 instruction_ipte_interlock;
u32 instruction_stsch;
u32 instruction_chsc;
u32 instruction_stsi;
u32 instruction_stfl;
u32 instruction_tprot;
u32 instruction_essa;
u32 instruction_sthyi;
u32 instruction_sigp_sense;
u32 instruction_sigp_sense_running;
u32 instruction_sigp_external_call;
u32 instruction_sigp_emergency;
u32 instruction_sigp_cond_emergency;
u32 instruction_sigp_start;
u32 instruction_sigp_stop;
u32 instruction_sigp_stop_store_status;
u32 instruction_sigp_store_status;
u32 instruction_sigp_store_adtl_status;
u32 instruction_sigp_arch;
u32 instruction_sigp_prefix;
u32 instruction_sigp_restart;
u32 instruction_sigp_init_cpu_reset;
u32 instruction_sigp_cpu_reset;
u32 instruction_sigp_unknown;
u32 diagnose_10;
u32 diagnose_44;
u32 diagnose_9c;
u32 diagnose_258;
u32 diagnose_308;
u32 diagnose_500;
};
#define PGM_OPERATION 0x01
#define PGM_PRIVILEGED_OP 0x02
#define PGM_EXECUTE 0x03
#define PGM_PROTECTION 0x04
#define PGM_ADDRESSING 0x05
#define PGM_SPECIFICATION 0x06
#define PGM_DATA 0x07
#define PGM_FIXED_POINT_OVERFLOW 0x08
#define PGM_FIXED_POINT_DIVIDE 0x09
#define PGM_DECIMAL_OVERFLOW 0x0a
#define PGM_DECIMAL_DIVIDE 0x0b
#define PGM_HFP_EXPONENT_OVERFLOW 0x0c
#define PGM_HFP_EXPONENT_UNDERFLOW 0x0d
#define PGM_HFP_SIGNIFICANCE 0x0e
#define PGM_HFP_DIVIDE 0x0f
#define PGM_SEGMENT_TRANSLATION 0x10
#define PGM_PAGE_TRANSLATION 0x11
#define PGM_TRANSLATION_SPEC 0x12
#define PGM_SPECIAL_OPERATION 0x13
#define PGM_OPERAND 0x15
#define PGM_TRACE_TABEL 0x16
#define PGM_VECTOR_PROCESSING 0x1b
#define PGM_SPACE_SWITCH 0x1c
#define PGM_HFP_SQUARE_ROOT 0x1d
#define PGM_PC_TRANSLATION_SPEC 0x1f
#define PGM_AFX_TRANSLATION 0x20
#define PGM_ASX_TRANSLATION 0x21
#define PGM_LX_TRANSLATION 0x22
#define PGM_EX_TRANSLATION 0x23
#define PGM_PRIMARY_AUTHORITY 0x24
#define PGM_SECONDARY_AUTHORITY 0x25
#define PGM_LFX_TRANSLATION 0x26
#define PGM_LSX_TRANSLATION 0x27
#define PGM_ALET_SPECIFICATION 0x28
#define PGM_ALEN_TRANSLATION 0x29
#define PGM_ALE_SEQUENCE 0x2a
#define PGM_ASTE_VALIDITY 0x2b
#define PGM_ASTE_SEQUENCE 0x2c
#define PGM_EXTENDED_AUTHORITY 0x2d
#define PGM_LSTE_SEQUENCE 0x2e
#define PGM_ASTE_INSTANCE 0x2f
#define PGM_STACK_FULL 0x30
#define PGM_STACK_EMPTY 0x31
#define PGM_STACK_SPECIFICATION 0x32
#define PGM_STACK_TYPE 0x33
#define PGM_STACK_OPERATION 0x34
#define PGM_ASCE_TYPE 0x38
#define PGM_REGION_FIRST_TRANS 0x39
#define PGM_REGION_SECOND_TRANS 0x3a
#define PGM_REGION_THIRD_TRANS 0x3b
#define PGM_MONITOR 0x40
#define PGM_PER 0x80
#define PGM_CRYPTO_OPERATION 0x119
KVM: s390: interrupt subsystem, cpu timer, waitpsw This patch contains the s390 interrupt subsystem (similar to in kernel apic) including timer interrupts (similar to in-kernel-pit) and enabled wait (similar to in kernel hlt). In order to achieve that, this patch also introduces intercept handling for instruction intercepts, and it implements load control instructions. This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both the vm file descriptors and the vcpu file descriptors. In case this ioctl is issued against a vm file descriptor, the interrupt is considered floating. Floating interrupts may be delivered to any virtual cpu in the configuration. The following interrupts are supported: SIGP STOP - interprocessor signal that stops a remote cpu SIGP SET PREFIX - interprocessor signal that sets the prefix register of a (stopped) remote cpu INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed and for smp_call_function() in the guest. PROGRAM INT - exception during program execution such as page fault, illegal instruction and friends RESTART - interprocessor signal that starts a stopped cpu INT VIRTIO - floating interrupt for virtio signalisation INT SERVICE - floating interrupt for signalisations from the system service processor struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting an interrupt, also carrys parameter data for interrupts along with the interrupt type. Interrupts on s390 usually have a state that represents the current operation, or identifies which device has caused the interruption on s390. kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a disabled wait (that is, disabled for interrupts), we exit to userspace. In case of an enabled wait we set up a timer that equals the cpu clock comparator value and sleep on a wait queue. [christian: change virtio interrupt to 0x2603] Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-26 01:47:26 +08:00
/* irq types in order of priority */
enum irq_types {
IRQ_PEND_MCHK_EX = 0,
IRQ_PEND_SVC,
IRQ_PEND_PROG,
IRQ_PEND_MCHK_REP,
IRQ_PEND_EXT_IRQ_KEY,
IRQ_PEND_EXT_MALFUNC,
IRQ_PEND_EXT_EMERGENCY,
IRQ_PEND_EXT_EXTERNAL,
IRQ_PEND_EXT_CLOCK_COMP,
IRQ_PEND_EXT_CPU_TIMER,
IRQ_PEND_EXT_TIMING,
IRQ_PEND_EXT_SERVICE,
IRQ_PEND_EXT_HOST,
IRQ_PEND_PFAULT_INIT,
IRQ_PEND_PFAULT_DONE,
IRQ_PEND_VIRTIO,
IRQ_PEND_IO_ISC_0,
IRQ_PEND_IO_ISC_1,
IRQ_PEND_IO_ISC_2,
IRQ_PEND_IO_ISC_3,
IRQ_PEND_IO_ISC_4,
IRQ_PEND_IO_ISC_5,
IRQ_PEND_IO_ISC_6,
IRQ_PEND_IO_ISC_7,
IRQ_PEND_SIGP_STOP,
IRQ_PEND_RESTART,
IRQ_PEND_SET_PREFIX,
IRQ_PEND_COUNT
};
/* We have 2M for virtio device descriptor pages. Smallest amount of
* memory per page is 24 bytes (1 queue), so (2048*1024) / 24 = 87381
*/
#define KVM_S390_MAX_VIRTIO_IRQS 87381
/*
* Repressible (non-floating) machine check interrupts
* subclass bits in MCIC
*/
#define MCHK_EXTD_BIT 58
#define MCHK_DEGR_BIT 56
#define MCHK_WARN_BIT 55
#define MCHK_REP_MASK ((1UL << MCHK_DEGR_BIT) | \
(1UL << MCHK_EXTD_BIT) | \
(1UL << MCHK_WARN_BIT))
/* Exigent machine check interrupts subclass bits in MCIC */
#define MCHK_SD_BIT 63
#define MCHK_PD_BIT 62
#define MCHK_EX_MASK ((1UL << MCHK_SD_BIT) | (1UL << MCHK_PD_BIT))
#define IRQ_PEND_EXT_MASK ((1UL << IRQ_PEND_EXT_IRQ_KEY) | \
(1UL << IRQ_PEND_EXT_CLOCK_COMP) | \
(1UL << IRQ_PEND_EXT_CPU_TIMER) | \
(1UL << IRQ_PEND_EXT_MALFUNC) | \
(1UL << IRQ_PEND_EXT_EMERGENCY) | \
(1UL << IRQ_PEND_EXT_EXTERNAL) | \
(1UL << IRQ_PEND_EXT_TIMING) | \
(1UL << IRQ_PEND_EXT_HOST) | \
(1UL << IRQ_PEND_EXT_SERVICE) | \
(1UL << IRQ_PEND_VIRTIO) | \
(1UL << IRQ_PEND_PFAULT_INIT) | \
(1UL << IRQ_PEND_PFAULT_DONE))
#define IRQ_PEND_IO_MASK ((1UL << IRQ_PEND_IO_ISC_0) | \
(1UL << IRQ_PEND_IO_ISC_1) | \
(1UL << IRQ_PEND_IO_ISC_2) | \
(1UL << IRQ_PEND_IO_ISC_3) | \
(1UL << IRQ_PEND_IO_ISC_4) | \
(1UL << IRQ_PEND_IO_ISC_5) | \
(1UL << IRQ_PEND_IO_ISC_6) | \
(1UL << IRQ_PEND_IO_ISC_7))
#define IRQ_PEND_MCHK_MASK ((1UL << IRQ_PEND_MCHK_REP) | \
(1UL << IRQ_PEND_MCHK_EX))
struct kvm_s390_interrupt_info {
KVM: s390: interrupt subsystem, cpu timer, waitpsw This patch contains the s390 interrupt subsystem (similar to in kernel apic) including timer interrupts (similar to in-kernel-pit) and enabled wait (similar to in kernel hlt). In order to achieve that, this patch also introduces intercept handling for instruction intercepts, and it implements load control instructions. This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both the vm file descriptors and the vcpu file descriptors. In case this ioctl is issued against a vm file descriptor, the interrupt is considered floating. Floating interrupts may be delivered to any virtual cpu in the configuration. The following interrupts are supported: SIGP STOP - interprocessor signal that stops a remote cpu SIGP SET PREFIX - interprocessor signal that sets the prefix register of a (stopped) remote cpu INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed and for smp_call_function() in the guest. PROGRAM INT - exception during program execution such as page fault, illegal instruction and friends RESTART - interprocessor signal that starts a stopped cpu INT VIRTIO - floating interrupt for virtio signalisation INT SERVICE - floating interrupt for signalisations from the system service processor struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting an interrupt, also carrys parameter data for interrupts along with the interrupt type. Interrupts on s390 usually have a state that represents the current operation, or identifies which device has caused the interruption on s390. kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a disabled wait (that is, disabled for interrupts), we exit to userspace. In case of an enabled wait we set up a timer that equals the cpu clock comparator value and sleep on a wait queue. [christian: change virtio interrupt to 0x2603] Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-26 01:47:26 +08:00
struct list_head list;
u64 type;
union {
struct kvm_s390_io_info io;
struct kvm_s390_ext_info ext;
struct kvm_s390_pgm_info pgm;
struct kvm_s390_emerg_info emerg;
struct kvm_s390_extcall_info extcall;
struct kvm_s390_prefix_info prefix;
struct kvm_s390_stop_info stop;
struct kvm_s390_mchk_info mchk;
KVM: s390: interrupt subsystem, cpu timer, waitpsw This patch contains the s390 interrupt subsystem (similar to in kernel apic) including timer interrupts (similar to in-kernel-pit) and enabled wait (similar to in kernel hlt). In order to achieve that, this patch also introduces intercept handling for instruction intercepts, and it implements load control instructions. This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both the vm file descriptors and the vcpu file descriptors. In case this ioctl is issued against a vm file descriptor, the interrupt is considered floating. Floating interrupts may be delivered to any virtual cpu in the configuration. The following interrupts are supported: SIGP STOP - interprocessor signal that stops a remote cpu SIGP SET PREFIX - interprocessor signal that sets the prefix register of a (stopped) remote cpu INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed and for smp_call_function() in the guest. PROGRAM INT - exception during program execution such as page fault, illegal instruction and friends RESTART - interprocessor signal that starts a stopped cpu INT VIRTIO - floating interrupt for virtio signalisation INT SERVICE - floating interrupt for signalisations from the system service processor struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting an interrupt, also carrys parameter data for interrupts along with the interrupt type. Interrupts on s390 usually have a state that represents the current operation, or identifies which device has caused the interruption on s390. kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a disabled wait (that is, disabled for interrupts), we exit to userspace. In case of an enabled wait we set up a timer that equals the cpu clock comparator value and sleep on a wait queue. [christian: change virtio interrupt to 0x2603] Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-26 01:47:26 +08:00
};
};
struct kvm_s390_irq_payload {
struct kvm_s390_io_info io;
struct kvm_s390_ext_info ext;
struct kvm_s390_pgm_info pgm;
struct kvm_s390_emerg_info emerg;
struct kvm_s390_extcall_info extcall;
struct kvm_s390_prefix_info prefix;
struct kvm_s390_stop_info stop;
struct kvm_s390_mchk_info mchk;
};
struct kvm_s390_local_interrupt {
KVM: s390: interrupt subsystem, cpu timer, waitpsw This patch contains the s390 interrupt subsystem (similar to in kernel apic) including timer interrupts (similar to in-kernel-pit) and enabled wait (similar to in kernel hlt). In order to achieve that, this patch also introduces intercept handling for instruction intercepts, and it implements load control instructions. This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both the vm file descriptors and the vcpu file descriptors. In case this ioctl is issued against a vm file descriptor, the interrupt is considered floating. Floating interrupts may be delivered to any virtual cpu in the configuration. The following interrupts are supported: SIGP STOP - interprocessor signal that stops a remote cpu SIGP SET PREFIX - interprocessor signal that sets the prefix register of a (stopped) remote cpu INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed and for smp_call_function() in the guest. PROGRAM INT - exception during program execution such as page fault, illegal instruction and friends RESTART - interprocessor signal that starts a stopped cpu INT VIRTIO - floating interrupt for virtio signalisation INT SERVICE - floating interrupt for signalisations from the system service processor struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting an interrupt, also carrys parameter data for interrupts along with the interrupt type. Interrupts on s390 usually have a state that represents the current operation, or identifies which device has caused the interruption on s390. kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a disabled wait (that is, disabled for interrupts), we exit to userspace. In case of an enabled wait we set up a timer that equals the cpu clock comparator value and sleep on a wait queue. [christian: change virtio interrupt to 0x2603] Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-26 01:47:26 +08:00
spinlock_t lock;
struct kvm_s390_float_interrupt *float_int;
KVM: Use simple waitqueue for vcpu->wq The problem: On -rt, an emulated LAPIC timer instances has the following path: 1) hard interrupt 2) ksoftirqd is scheduled 3) ksoftirqd wakes up vcpu thread 4) vcpu thread is scheduled This extra context switch introduces unnecessary latency in the LAPIC path for a KVM guest. The solution: Allow waking up vcpu thread from hardirq context, thus avoiding the need for ksoftirqd to be scheduled. Normal waitqueues make use of spinlocks, which on -RT are sleepable locks. Therefore, waking up a waitqueue waiter involves locking a sleeping lock, which is not allowed from hard interrupt context. cyclictest command line: This patch reduces the average latency in my tests from 14us to 11us. Daniel writes: Paolo asked for numbers from kvm-unit-tests/tscdeadline_latency benchmark on mainline. The test was run 1000 times on tip/sched/core 4.4.0-rc8-01134-g0905f04: ./x86-run x86/tscdeadline_latency.flat -cpu host with idle=poll. The test seems not to deliver really stable numbers though most of them are smaller. Paolo write: "Anything above ~10000 cycles means that the host went to C1 or lower---the number means more or less nothing in that case. The mean shows an improvement indeed." Before: min max mean std count 1000.000000 1000.000000 1000.000000 1000.000000 mean 5162.596000 2019270.084000 5824.491541 20681.645558 std 75.431231 622607.723969 89.575700 6492.272062 min 4466.000000 23928.000000 5537.926500 585.864966 25% 5163.000000 1613252.750000 5790.132275 16683.745433 50% 5175.000000 2281919.000000 5834.654000 23151.990026 75% 5190.000000 2382865.750000 5861.412950 24148.206168 max 5228.000000 4175158.000000 6254.827300 46481.048691 After min max mean std count 1000.000000 1000.00000 1000.000000 1000.000000 mean 5143.511000 2076886.10300 5813.312474 21207.357565 std 77.668322 610413.09583 86.541500 6331.915127 min 4427.000000 25103.00000 5529.756600 559.187707 25% 5148.000000 1691272.75000 5784.889825 17473.518244 50% 5160.000000 2308328.50000 5832.025000 23464.837068 75% 5172.000000 2393037.75000 5853.177675 24223.969976 max 5222.000000 3922458.00000 6186.720500 42520.379830 [Patch was originaly based on the swait implementation found in the -rt tree. Daniel ported it to mainline's version and gathered the benchmark numbers for tscdeadline_latency test.] Signed-off-by: Daniel Wagner <daniel.wagner@bmw-carit.de> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: linux-rt-users@vger.kernel.org Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Paul Gortmaker <paul.gortmaker@windriver.com> Cc: Paolo Bonzini <pbonzini@redhat.com> Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> Link: http://lkml.kernel.org/r/1455871601-27484-4-git-send-email-wagi@monom.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2016-02-19 16:46:39 +08:00
struct swait_queue_head *wq;
atomic_t *cpuflags;
DECLARE_BITMAP(sigp_emerg_pending, KVM_MAX_VCPUS);
struct kvm_s390_irq_payload irq;
unsigned long pending_irqs;
KVM: s390: interrupt subsystem, cpu timer, waitpsw This patch contains the s390 interrupt subsystem (similar to in kernel apic) including timer interrupts (similar to in-kernel-pit) and enabled wait (similar to in kernel hlt). In order to achieve that, this patch also introduces intercept handling for instruction intercepts, and it implements load control instructions. This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both the vm file descriptors and the vcpu file descriptors. In case this ioctl is issued against a vm file descriptor, the interrupt is considered floating. Floating interrupts may be delivered to any virtual cpu in the configuration. The following interrupts are supported: SIGP STOP - interprocessor signal that stops a remote cpu SIGP SET PREFIX - interprocessor signal that sets the prefix register of a (stopped) remote cpu INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed and for smp_call_function() in the guest. PROGRAM INT - exception during program execution such as page fault, illegal instruction and friends RESTART - interprocessor signal that starts a stopped cpu INT VIRTIO - floating interrupt for virtio signalisation INT SERVICE - floating interrupt for signalisations from the system service processor struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting an interrupt, also carrys parameter data for interrupts along with the interrupt type. Interrupts on s390 usually have a state that represents the current operation, or identifies which device has caused the interruption on s390. kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a disabled wait (that is, disabled for interrupts), we exit to userspace. In case of an enabled wait we set up a timer that equals the cpu clock comparator value and sleep on a wait queue. [christian: change virtio interrupt to 0x2603] Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-26 01:47:26 +08:00
};
#define FIRQ_LIST_IO_ISC_0 0
#define FIRQ_LIST_IO_ISC_1 1
#define FIRQ_LIST_IO_ISC_2 2
#define FIRQ_LIST_IO_ISC_3 3
#define FIRQ_LIST_IO_ISC_4 4
#define FIRQ_LIST_IO_ISC_5 5
#define FIRQ_LIST_IO_ISC_6 6
#define FIRQ_LIST_IO_ISC_7 7
#define FIRQ_LIST_PFAULT 8
#define FIRQ_LIST_VIRTIO 9
#define FIRQ_LIST_COUNT 10
#define FIRQ_CNTR_IO 0
#define FIRQ_CNTR_SERVICE 1
#define FIRQ_CNTR_VIRTIO 2
#define FIRQ_CNTR_PFAULT 3
#define FIRQ_MAX_COUNT 4
struct kvm_s390_float_interrupt {
unsigned long pending_irqs;
KVM: s390: interrupt subsystem, cpu timer, waitpsw This patch contains the s390 interrupt subsystem (similar to in kernel apic) including timer interrupts (similar to in-kernel-pit) and enabled wait (similar to in kernel hlt). In order to achieve that, this patch also introduces intercept handling for instruction intercepts, and it implements load control instructions. This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both the vm file descriptors and the vcpu file descriptors. In case this ioctl is issued against a vm file descriptor, the interrupt is considered floating. Floating interrupts may be delivered to any virtual cpu in the configuration. The following interrupts are supported: SIGP STOP - interprocessor signal that stops a remote cpu SIGP SET PREFIX - interprocessor signal that sets the prefix register of a (stopped) remote cpu INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed and for smp_call_function() in the guest. PROGRAM INT - exception during program execution such as page fault, illegal instruction and friends RESTART - interprocessor signal that starts a stopped cpu INT VIRTIO - floating interrupt for virtio signalisation INT SERVICE - floating interrupt for signalisations from the system service processor struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting an interrupt, also carrys parameter data for interrupts along with the interrupt type. Interrupts on s390 usually have a state that represents the current operation, or identifies which device has caused the interruption on s390. kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a disabled wait (that is, disabled for interrupts), we exit to userspace. In case of an enabled wait we set up a timer that equals the cpu clock comparator value and sleep on a wait queue. [christian: change virtio interrupt to 0x2603] Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-26 01:47:26 +08:00
spinlock_t lock;
struct list_head lists[FIRQ_LIST_COUNT];
int counters[FIRQ_MAX_COUNT];
struct kvm_s390_mchk_info mchk;
struct kvm_s390_ext_info srv_signal;
KVM: s390: interrupt subsystem, cpu timer, waitpsw This patch contains the s390 interrupt subsystem (similar to in kernel apic) including timer interrupts (similar to in-kernel-pit) and enabled wait (similar to in kernel hlt). In order to achieve that, this patch also introduces intercept handling for instruction intercepts, and it implements load control instructions. This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both the vm file descriptors and the vcpu file descriptors. In case this ioctl is issued against a vm file descriptor, the interrupt is considered floating. Floating interrupts may be delivered to any virtual cpu in the configuration. The following interrupts are supported: SIGP STOP - interprocessor signal that stops a remote cpu SIGP SET PREFIX - interprocessor signal that sets the prefix register of a (stopped) remote cpu INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed and for smp_call_function() in the guest. PROGRAM INT - exception during program execution such as page fault, illegal instruction and friends RESTART - interprocessor signal that starts a stopped cpu INT VIRTIO - floating interrupt for virtio signalisation INT SERVICE - floating interrupt for signalisations from the system service processor struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting an interrupt, also carrys parameter data for interrupts along with the interrupt type. Interrupts on s390 usually have a state that represents the current operation, or identifies which device has caused the interruption on s390. kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a disabled wait (that is, disabled for interrupts), we exit to userspace. In case of an enabled wait we set up a timer that equals the cpu clock comparator value and sleep on a wait queue. [christian: change virtio interrupt to 0x2603] Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-26 01:47:26 +08:00
int next_rr_cpu;
unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
KVM: s390: interrupt subsystem, cpu timer, waitpsw This patch contains the s390 interrupt subsystem (similar to in kernel apic) including timer interrupts (similar to in-kernel-pit) and enabled wait (similar to in kernel hlt). In order to achieve that, this patch also introduces intercept handling for instruction intercepts, and it implements load control instructions. This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both the vm file descriptors and the vcpu file descriptors. In case this ioctl is issued against a vm file descriptor, the interrupt is considered floating. Floating interrupts may be delivered to any virtual cpu in the configuration. The following interrupts are supported: SIGP STOP - interprocessor signal that stops a remote cpu SIGP SET PREFIX - interprocessor signal that sets the prefix register of a (stopped) remote cpu INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed and for smp_call_function() in the guest. PROGRAM INT - exception during program execution such as page fault, illegal instruction and friends RESTART - interprocessor signal that starts a stopped cpu INT VIRTIO - floating interrupt for virtio signalisation INT SERVICE - floating interrupt for signalisations from the system service processor struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting an interrupt, also carrys parameter data for interrupts along with the interrupt type. Interrupts on s390 usually have a state that represents the current operation, or identifies which device has caused the interruption on s390. kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a disabled wait (that is, disabled for interrupts), we exit to userspace. In case of an enabled wait we set up a timer that equals the cpu clock comparator value and sleep on a wait queue. [christian: change virtio interrupt to 0x2603] Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-26 01:47:26 +08:00
};
struct kvm_hw_wp_info_arch {
unsigned long addr;
unsigned long phys_addr;
int len;
char *old_data;
};
struct kvm_hw_bp_info_arch {
unsigned long addr;
int len;
};
/*
* Only the upper 16 bits of kvm_guest_debug->control are arch specific.
* Further KVM_GUESTDBG flags which an be used from userspace can be found in
* arch/s390/include/uapi/asm/kvm.h
*/
#define KVM_GUESTDBG_EXIT_PENDING 0x10000000
#define guestdbg_enabled(vcpu) \
(vcpu->guest_debug & KVM_GUESTDBG_ENABLE)
#define guestdbg_sstep_enabled(vcpu) \
(vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
#define guestdbg_hw_bp_enabled(vcpu) \
(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
#define guestdbg_exit_pending(vcpu) (guestdbg_enabled(vcpu) && \
(vcpu->guest_debug & KVM_GUESTDBG_EXIT_PENDING))
struct kvm_guestdbg_info_arch {
unsigned long cr0;
unsigned long cr9;
unsigned long cr10;
unsigned long cr11;
struct kvm_hw_bp_info_arch *hw_bp_info;
struct kvm_hw_wp_info_arch *hw_wp_info;
int nr_hw_bp;
int nr_hw_wp;
unsigned long last_bp;
};
KVM: s390: interrupt subsystem, cpu timer, waitpsw This patch contains the s390 interrupt subsystem (similar to in kernel apic) including timer interrupts (similar to in-kernel-pit) and enabled wait (similar to in kernel hlt). In order to achieve that, this patch also introduces intercept handling for instruction intercepts, and it implements load control instructions. This patch introduces an ioctl KVM_S390_INTERRUPT which is valid for both the vm file descriptors and the vcpu file descriptors. In case this ioctl is issued against a vm file descriptor, the interrupt is considered floating. Floating interrupts may be delivered to any virtual cpu in the configuration. The following interrupts are supported: SIGP STOP - interprocessor signal that stops a remote cpu SIGP SET PREFIX - interprocessor signal that sets the prefix register of a (stopped) remote cpu INT EMERGENCY - interprocessor interrupt, usually used to signal need_reshed and for smp_call_function() in the guest. PROGRAM INT - exception during program execution such as page fault, illegal instruction and friends RESTART - interprocessor signal that starts a stopped cpu INT VIRTIO - floating interrupt for virtio signalisation INT SERVICE - floating interrupt for signalisations from the system service processor struct kvm_s390_interrupt, which is submitted as ioctl parameter when injecting an interrupt, also carrys parameter data for interrupts along with the interrupt type. Interrupts on s390 usually have a state that represents the current operation, or identifies which device has caused the interruption on s390. kvm_s390_handle_wait() does handle waitpsw in two flavors: in case of a disabled wait (that is, disabled for interrupts), we exit to userspace. In case of an enabled wait we set up a timer that equals the cpu clock comparator value and sleep on a wait queue. [christian: change virtio interrupt to 0x2603] Acked-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com> Signed-off-by: Carsten Otte <cotte@de.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: Avi Kivity <avi@qumranet.com>
2008-03-26 01:47:26 +08:00
struct kvm_vcpu_arch {
struct kvm_s390_sie_block *sie_block;
unsigned int host_acrs[NUM_ACRS];
s390/kernel: lazy restore fpu registers Improve the save and restore behavior of FPU register contents to use the vector extension within the kernel. The kernel does not use floating-point or vector registers and, therefore, saving and restoring the FPU register contents are performed for handling signals or switching processes only. To prepare for using vector instructions and vector registers within the kernel, enhance the save behavior and implement a lazy restore at return to user space from a system call or interrupt. To implement the lazy restore, the save_fpu_regs() sets a CPU information flag, CIF_FPU, to indicate that the FPU registers must be restored. Saving and setting CIF_FPU is performed in an atomic fashion to be interrupt-safe. When the kernel wants to use the vector extension or wants to change the FPU register state for a task during signal handling, the save_fpu_regs() must be called first. The CIF_FPU flag is also set at process switch. At return to user space, the FPU state is restored. In particular, the FPU state includes the floating-point or vector register contents, as well as, vector-enablement and floating-point control. The FPU state restore and clearing CIF_FPU is also performed in an atomic fashion. For KVM, the restore of the FPU register state is performed when restoring the general-purpose guest registers before the SIE instructions is started. Because the path towards the SIE instruction is interruptible, the CIF_FPU flag must be checked again right before going into SIE. If set, the guest registers must be reloaded again by re-entering the outer SIE loop. This is the same behavior as if the SIE critical section is interrupted. Signed-off-by: Hendrik Brueckner <brueckner@linux.vnet.ibm.com> Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
2015-06-10 18:53:42 +08:00
struct fpu host_fpregs;
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
struct kvm_s390_pgm_info pgm;
struct gmap *gmap;
/* backup location for the currently enabled gmap when scheduled out */
struct gmap *enabled_gmap;
struct kvm_guestdbg_info_arch guestdbg;
unsigned long pfault_token;
unsigned long pfault_select;
unsigned long pfault_compare;
bool cputm_enabled;
KVM: s390: protect VCPU cpu timer with a seqcount For now, only the owning VCPU thread (that has loaded the VCPU) can get a consistent cpu timer value when calculating the delta. However, other threads might also be interested in a more recent, consistent value. Of special interest will be the timer callback of a VCPU that executes without having the VCPU loaded and could run in parallel with the VCPU thread. The cpu timer has a nice property: it is only updated by the owning VCPU thread. And speaking about accounting, a consistent value can only be calculated by looking at cputm_start and the cpu timer itself in one shot, otherwise the result might be wrong. As we only have one writing thread at a time (owning VCPU thread), we can use a seqcount instead of a seqlock and retry if the VCPU refreshed its cpu timer. This avoids any heavy locking and only introduces a counter update/check plus a handful of smp_wmb(). The owning VCPU thread should never have to retry on reads, and also for other threads this might be a very rare scenario. Please note that we have to use the raw_* variants for locking the seqcount as lockdep will produce false warnings otherwise. The rq->lock held during vcpu_load/put is also acquired from hardirq context. Lockdep cannot know that we avoid potential deadlocks by disabling preemption and thereby disable concurrent write locking attempts (via vcpu_put/load). Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com> Signed-off-by: David Hildenbrand <dahi@linux.vnet.ibm.com> Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
2016-02-18 04:53:33 +08:00
/*
* The seqcount protects updates to cputm_start and sie_block.cputm,
* this way we can have non-blocking reads with consistent values.
* Only the owning VCPU thread (vcpu->cpu) is allowed to change these
* values and to start/stop/enable/disable cpu timer accounting.
*/
seqcount_t cputm_seqcount;
__u64 cputm_start;
};
struct kvm_vm_stat {
u32 remote_tlb_flush;
};
struct kvm_arch_memory_slot {
};
struct s390_map_info {
struct list_head list;
__u64 guest_addr;
__u64 addr;
struct page *page;
};
struct s390_io_adapter {
unsigned int id;
int isc;
bool maskable;
bool masked;
bool swap;
struct rw_semaphore maps_lock;
struct list_head maps;
atomic_t nr_maps;
};
#define MAX_S390_IO_ADAPTERS ((MAX_ISC + 1) * 8)
#define MAX_S390_ADAPTER_MAPS 256
/* maximum size of facilities and facility mask is 2k bytes */
#define S390_ARCH_FAC_LIST_SIZE_BYTE (1<<11)
#define S390_ARCH_FAC_LIST_SIZE_U64 \
(S390_ARCH_FAC_LIST_SIZE_BYTE / sizeof(u64))
#define S390_ARCH_FAC_MASK_SIZE_BYTE S390_ARCH_FAC_LIST_SIZE_BYTE
#define S390_ARCH_FAC_MASK_SIZE_U64 \
(S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64))
struct kvm_s390_cpu_model {
/* facility mask supported by kvm & hosting machine */
__u64 fac_mask[S390_ARCH_FAC_LIST_SIZE_U64];
/* facility list requested by guest (in dma page) */
__u64 *fac_list;
u64 cpuid;
unsigned short ibc;
};
struct kvm_s390_crypto {
struct kvm_s390_crypto_cb *crycb;
__u32 crycbd;
__u8 aes_kw;
__u8 dea_kw;
};
struct kvm_s390_crypto_cb {
__u8 reserved00[72]; /* 0x0000 */
__u8 dea_wrapping_key_mask[24]; /* 0x0048 */
__u8 aes_wrapping_key_mask[32]; /* 0x0060 */
__u8 reserved80[128]; /* 0x0080 */
};
/*
* sie_page2 has to be allocated as DMA because fac_list and crycb need
* 31bit addresses in the sie control block.
*/
struct sie_page2 {
__u64 fac_list[S390_ARCH_FAC_LIST_SIZE_U64]; /* 0x0000 */
struct kvm_s390_crypto_cb crycb; /* 0x0800 */
u8 reserved900[0x1000 - 0x900]; /* 0x0900 */
} __packed;
struct kvm_arch{
void *sca;
int use_esca;
rwlock_t sca_lock;
debug_info_t *dbf;
struct kvm_s390_float_interrupt float_int;
struct kvm_device *flic;
struct gmap *gmap;
unsigned long mem_limit;
int css_support;
int use_irqchip;
int use_cmma;
int user_cpu_state_ctrl;
int user_sigp;
int user_stsi;
struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
wait_queue_head_t ipte_wq;
int ipte_lock_count;
struct mutex ipte_mutex;
struct ratelimit_state sthyi_limit;
spinlock_t start_stop_lock;
struct sie_page2 *sie_page2;
struct kvm_s390_cpu_model model;
struct kvm_s390_crypto crypto;
u64 epoch;
/* subset of available cpu features enabled by user space */
DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
};
#define KVM_HVA_ERR_BAD (-1UL)
#define KVM_HVA_ERR_RO_BAD (-2UL)
static inline bool kvm_is_error_hva(unsigned long addr)
{
return IS_ERR_VALUE(addr);
}
#define ASYNC_PF_PER_VCPU 64
struct kvm_arch_async_pf {
unsigned long pfault_token;
};
bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu);
void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work);
extern int sie64a(struct kvm_s390_sie_block *, u64 *);
extern char sie_exit;
static inline void kvm_arch_hardware_disable(void) {}
static inline void kvm_arch_check_processor_compat(void *rtn) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
static inline void kvm_arch_free_memslot(struct kvm *kvm,
struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {}
static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {}
static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot) {}
static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
KVM: halt_polling: provide a way to qualify wakeups during poll Some wakeups should not be considered a sucessful poll. For example on s390 I/O interrupts are usually floating, which means that _ALL_ CPUs would be considered runnable - letting all vCPUs poll all the time for transactional like workload, even if one vCPU would be enough. This can result in huge CPU usage for large guests. This patch lets architectures provide a way to qualify wakeups if they should be considered a good/bad wakeups in regard to polls. For s390 the implementation will fence of halt polling for anything but known good, single vCPU events. The s390 implementation for floating interrupts does a wakeup for one vCPU, but the interrupt will be delivered by whatever CPU checks first for a pending interrupt. We prefer the woken up CPU by marking the poll of this CPU as "good" poll. This code will also mark several other wakeup reasons like IPI or expired timers as "good". This will of course also mark some events as not sucessful. As KVM on z runs always as a 2nd level hypervisor, we prefer to not poll, unless we are really sure, though. This patch successfully limits the CPU usage for cases like uperf 1byte transactional ping pong workload or wakeup heavy workload like OLTP while still providing a proper speedup. This also introduced a new vcpu stat "halt_poll_no_tuning" that marks wakeups that are considered not good for polling. Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com> Acked-by: Radim Krčmář <rkrcmar@redhat.com> (for an earlier version) Cc: David Matlack <dmatlack@google.com> Cc: Wanpeng Li <kernellwp@gmail.com> [Rename config symbol. - Paolo] Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-05-13 18:16:35 +08:00
void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu);
#endif