From 8e03c100a7aeb268a7ebfdd98298591a63df0626 Mon Sep 17 00:00:00 2001
From: Stefan Weil <sw@weilnetz.de>
Date: Thu, 20 Mar 2014 22:30:32 +0100
Subject: [PATCH 01/15] target-i386: Remove unused data from local array

Signed-off-by: Stefan Weil <sw@weilnetz.de>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-i386/kvm.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 4389959f61..d17eea3186 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -130,14 +130,13 @@ static const struct kvm_para_features {
     { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY },
     { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP },
     { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF },
-    { -1, -1 }
 };
 
 static int get_para_features(KVMState *s)
 {
     int i, features = 0;
 
-    for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) {
+    for (i = 0; i < ARRAY_SIZE(para_features); i++) {
         if (kvm_check_extension(s, para_features[i].cap)) {
             features |= (1 << para_features[i].feature);
         }

From ada4135f84adcacd9294269e22113460650ae1ab Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Fri, 9 May 2014 10:06:46 +0200
Subject: [PATCH 02/15] kvm: make one_reg helpers available for everyone

s390x introduced helper functions for getting/setting one_regs with
commit 860643bc. However, nothing about these is s390-specific.

Alexey Kardashevskiy had already posted a general version, so let's
merge the two patches and massage the code a bit.

CC: Alexey Kardashevskiy <aik@ozlabs.ru>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/sysemu/kvm.h | 20 ++++++++++++++++++++
 kvm-all.c            | 28 ++++++++++++++++++++++++++++
 target-s390x/kvm.c   | 29 -----------------------------
 trace-events         |  6 ++----
 4 files changed, 50 insertions(+), 33 deletions(-)

diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index 5ad4e0e1e2..a6c2823352 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -383,4 +383,24 @@ void kvm_init_irq_routing(KVMState *s);
  *          > 0: irq chip was created
  */
 int kvm_arch_irqchip_create(KVMState *s);
+
+/**
+ * kvm_set_one_reg - set a register value in KVM via KVM_SET_ONE_REG ioctl
+ * @id: The register ID
+ * @source: The pointer to the value to be set. It must point to a variable
+ *          of the correct type/size for the register being accessed.
+ *
+ * Returns: 0 on success, or a negative errno on failure.
+ */
+int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source);
+
+/**
+ * kvm_get_one_reg - get a register value from KVM via KVM_GET_ONE_REG ioctl
+ * @id: The register ID
+ * @target: The pointer where the value is to be stored. It must point to a
+ *          variable of the correct type/size for the register being accessed.
+ *
+ * Returns: 0 on success, or a negative errno on failure.
+ */
+int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target);
 #endif
diff --git a/kvm-all.c b/kvm-all.c
index 5cb7f26f4f..94520e5915 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -2114,3 +2114,31 @@ int kvm_create_device(KVMState *s, uint64_t type, bool test)
 
     return test ? 0 : create_dev.fd;
 }
+
+int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source)
+{
+    struct kvm_one_reg reg;
+    int r;
+
+    reg.id = id;
+    reg.addr = (uintptr_t) source;
+    r = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
+    if (r) {
+        trace_kvm_failed_reg_set(id, strerror(r));
+    }
+    return r;
+}
+
+int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target)
+{
+    struct kvm_one_reg reg;
+    int r;
+
+    reg.id = id;
+    reg.addr = (uintptr_t) target;
+    r = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
+    if (r) {
+        trace_kvm_failed_reg_get(id, strerror(r));
+    }
+    return r;
+}
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index b7b0edc4f1..ba2dffe3dd 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -129,35 +129,6 @@ void kvm_arch_reset_vcpu(CPUState *cpu)
     }
 }
 
-static int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source)
-{
-    struct kvm_one_reg reg;
-    int r;
-
-    reg.id = id;
-    reg.addr = (uint64_t) source;
-    r = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, &reg);
-    if (r) {
-        trace_kvm_failed_reg_set(id, strerror(errno));
-    }
-    return r;
-}
-
-static int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target)
-{
-    struct kvm_one_reg reg;
-    int r;
-
-    reg.id = id;
-    reg.addr = (uint64_t) target;
-    r = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, &reg);
-    if (r) {
-        trace_kvm_failed_reg_get(id, strerror(errno));
-    }
-    return r;
-}
-
-
 int kvm_arch_put_registers(CPUState *cs, int level)
 {
     S390CPU *cpu = S390_CPU(cs);
diff --git a/trace-events b/trace-events
index af4449d2ba..2c5b30763e 100644
--- a/trace-events
+++ b/trace-events
@@ -1230,6 +1230,8 @@ kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d"
 kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p"
 kvm_failed_spr_set(int str, const char *msg) "Warning: Unable to set SPR %d to KVM: %s"
 kvm_failed_spr_get(int str, const char *msg) "Warning: Unable to retrieve SPR %d from KVM: %s"
+kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
+kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"
 
 # memory.c
 memory_region_ops_read(void *mr, uint64_t addr, uint64_t value, unsigned size) "mr %p addr %#"PRIx64" value %#"PRIx64" size %u"
@@ -1246,7 +1248,3 @@ xen_pv_mmio_write(uint64_t addr) "WARNING: write to Xen PV Device MMIO space (ad
 # hw/pci/pci_host.c
 pci_cfg_read(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x -> 0x%x"
 pci_cfg_write(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x <- 0x%x"
-
-# target-s390/kvm.c
-kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s"
-kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"

From ee11f7a8227bcd9cb8d8141fa8f6a0352f7c0c68 Mon Sep 17 00:00:00 2001
From: Gonglei <arei.gonglei@huawei.com>
Date: Thu, 3 Apr 2014 13:18:23 +0800
Subject: [PATCH 03/15] pci-assign: Fix a bug when map MSI-X table memory
 failed

When mmapping memory for the MSI-X table failsthe dev->msix_table is
not set to NULL and assigned_dev_unregister_msix_mmio() will cause
a segfault when trying to munmap it.

Signed-off-by: Gonglei Arei <arei.gonglei@huawei.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>

Signed-off-by: Gonglei <arei.gonglei@huawei.com>
---
 hw/i386/kvm/pci-assign.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index e55421adcd..0572821f21 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -1664,6 +1664,7 @@ static void assigned_dev_register_msix_mmio(AssignedDevice *dev, Error **errp)
                            MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
     if (dev->msix_table == MAP_FAILED) {
         error_setg_errno(errp, errno, "failed to allocate msix_table");
+        dev->msix_table = NULL;
         return;
     }
 

From 639973a4740f38789057744b550df3a175bc49ad Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 28 Apr 2014 17:02:21 +0300
Subject: [PATCH 04/15] pci-assign: limit # of msix vectors

KVM only supports MSIX table size up to 256 vectors,
but some assigned devices support more vectors,
at the moment attempts to assign them fail with EINVAL.

Tweak the MSIX capability exposed to guest to limit table size
to a supported value.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Tested-by: Gonglei <arei.gonglei@huawei.com>
Cc: qemu-stable@nongnu.org
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/i386/kvm/pci-assign.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c
index 0572821f21..de33657563 100644
--- a/hw/i386/kvm/pci-assign.c
+++ b/hw/i386/kvm/pci-assign.c
@@ -1300,6 +1300,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev, Error **errp)
     if (pos != 0 && kvm_device_msix_supported(kvm_state)) {
         int bar_nr;
         uint32_t msix_table_entry;
+        uint16_t msix_max;
 
         verify_irqchip_in_kernel(&local_err);
         if (local_err) {
@@ -1315,9 +1316,10 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev, Error **errp)
         }
         pci_dev->msix_cap = pos;
 
-        pci_set_word(pci_dev->config + pos + PCI_MSIX_FLAGS,
-                     pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS) &
-                     PCI_MSIX_FLAGS_QSIZE);
+        msix_max = (pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS) &
+                    PCI_MSIX_FLAGS_QSIZE) + 1;
+        msix_max = MIN(msix_max, KVM_MAX_MSIX_PER_DEV);
+        pci_set_word(pci_dev->config + pos + PCI_MSIX_FLAGS, msix_max - 1);
 
         /* Only enable and function mask bits are writable */
         pci_set_word(pci_dev->wmask + pos + PCI_MSIX_FLAGS,
@@ -1327,9 +1329,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev, Error **errp)
         bar_nr = msix_table_entry & PCI_MSIX_FLAGS_BIRMASK;
         msix_table_entry &= ~PCI_MSIX_FLAGS_BIRMASK;
         dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry;
-        dev->msix_max = pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS);
-        dev->msix_max &= PCI_MSIX_FLAGS_QSIZE;
-        dev->msix_max += 1;
+        dev->msix_max = msix_max;
     }
 
     /* Minimal PM support, nothing writable, device appears to NAK changes */

From 304520291aac95d6a45a3f369f5275e11ef15b2e Mon Sep 17 00:00:00 2001
From: Kevin O'Connor <kevin@koconnor.net>
Date: Tue, 29 Apr 2014 16:37:50 -0400
Subject: [PATCH 05/15] target-i386: set eflags prior to calling
 svm_load_seg_cache() in svm_helper.c

The svm_load_seg_cache() function calls cpu_x86_load_seg_cache() which
inspects env->eflags.  So, make sure all changes to eflags are done
prior to loading the segment cache.

Signed-off-by: Kevin O'Connor <kevin@koconnor.net>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-i386/svm_helper.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/target-i386/svm_helper.c b/target-i386/svm_helper.c
index aa17ecdece..848a4b9985 100644
--- a/target-i386/svm_helper.c
+++ b/target-i386/svm_helper.c
@@ -703,7 +703,8 @@ void helper_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1)
     cpu_load_eflags(env, ldq_phys(cs->as,
                                   env->vm_hsave + offsetof(struct vmcb,
                                                            save.rflags)),
-                    ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK));
+                    ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK |
+                      VM_MASK));
     CC_OP = CC_OP_EFLAGS;
 
     svm_load_seg_cache(env, env->vm_hsave + offsetof(struct vmcb, save.es),
@@ -756,10 +757,6 @@ void helper_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1)
        from the page table indicated the host's CR3. If the PDPEs contain
        illegal state, the processor causes a shutdown. */
 
-    /* Forces CR0.PE = 1, RFLAGS.VM = 0. */
-    env->cr[0] |= CR0_PE_MASK;
-    env->eflags &= ~VM_MASK;
-
     /* Disables all breakpoints in the host DR7 register. */
 
     /* Checks the reloaded host state for consistency. */

From 010e639a8dd3848c7994aae288c218c6ea52a819 Mon Sep 17 00:00:00 2001
From: Kevin O'Connor <kevin@koconnor.net>
Date: Tue, 29 Apr 2014 16:38:10 -0400
Subject: [PATCH 06/15] target-i386: set eflags and cr0 prior to calling
 cpu_x86_load_seg_cache() in smm_helper.c

The cpu_x86_load_seg_cache() function inspects cr0 and eflags, so make
sure all changes to eflags and cr0 are done prior to loading the
segment caches.

Signed-off-by: Kevin O'Connor <kevin@koconnor.net>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-i386/smm_helper.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c
index 35901c9e89..4841d53b24 100644
--- a/target-i386/smm_helper.c
+++ b/target-i386/smm_helper.c
@@ -163,6 +163,13 @@ void do_smm_enter(X86CPU *cpu)
     cpu_load_eflags(env, 0, ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C |
                               DF_MASK));
     env->eip = 0x00008000;
+    cpu_x86_update_cr0(env,
+                       env->cr[0] & ~(CR0_PE_MASK | CR0_EM_MASK | CR0_TS_MASK |
+                                      CR0_PG_MASK));
+    cpu_x86_update_cr4(env, 0);
+    env->dr[7] = 0x00000400;
+    CC_OP = CC_OP_EFLAGS;
+
     cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase,
                            0xffffffff, 0);
     cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, 0);
@@ -170,13 +177,6 @@ void do_smm_enter(X86CPU *cpu)
     cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, 0);
     cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, 0);
     cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, 0);
-
-    cpu_x86_update_cr0(env,
-                       env->cr[0] & ~(CR0_PE_MASK | CR0_EM_MASK | CR0_TS_MASK |
-                                      CR0_PG_MASK));
-    cpu_x86_update_cr4(env, 0);
-    env->dr[7] = 0x00000400;
-    CC_OP = CC_OP_EFLAGS;
 }
 
 void helper_rsm(CPUX86State *env)
@@ -191,16 +191,6 @@ void helper_rsm(CPUX86State *env)
 #ifdef TARGET_X86_64
     cpu_load_efer(env, ldq_phys(cs->as, sm_state + 0x7ed0));
 
-    for (i = 0; i < 6; i++) {
-        offset = 0x7e00 + i * 16;
-        cpu_x86_load_seg_cache(env, i,
-                               lduw_phys(cs->as, sm_state + offset),
-                               ldq_phys(cs->as, sm_state + offset + 8),
-                               ldl_phys(cs->as, sm_state + offset + 4),
-                               (lduw_phys(cs->as, sm_state + offset + 2) &
-                                0xf0ff) << 8);
-    }
-
     env->gdt.base = ldq_phys(cs->as, sm_state + 0x7e68);
     env->gdt.limit = ldl_phys(cs->as, sm_state + 0x7e64);
 
@@ -238,6 +228,16 @@ void helper_rsm(CPUX86State *env)
     cpu_x86_update_cr3(env, ldl_phys(cs->as, sm_state + 0x7f50));
     cpu_x86_update_cr0(env, ldl_phys(cs->as, sm_state + 0x7f58));
 
+    for (i = 0; i < 6; i++) {
+        offset = 0x7e00 + i * 16;
+        cpu_x86_load_seg_cache(env, i,
+                               lduw_phys(cs->as, sm_state + offset),
+                               ldq_phys(cs->as, sm_state + offset + 8),
+                               ldl_phys(cs->as, sm_state + offset + 4),
+                               (lduw_phys(cs->as, sm_state + offset + 2) &
+                                0xf0ff) << 8);
+    }
+
     val = ldl_phys(cs->as, sm_state + 0x7efc); /* revision ID */
     if (val & 0x20000) {
         env->smbase = ldl_phys(cs->as, sm_state + 0x7f00) & ~0x7fff;

From fd460606fd6f356ddcf424558ed67664e8d46eb4 Mon Sep 17 00:00:00 2001
From: Kevin O'Connor <kevin@koconnor.net>
Date: Tue, 29 Apr 2014 16:38:31 -0400
Subject: [PATCH 07/15] target-i386: set eflags prior to calling
 cpu_x86_load_seg_cache() in seg_helper.c

The cpu_x86_load_seg_cache() function inspects eflags, so make sure
all changes to eflags are done prior to loading the segment caches.

Signed-off-by: Kevin O'Connor <kevin@koconnor.net>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-i386/seg_helper.c | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
index 8c3f92c22b..2e0b113bd9 100644
--- a/target-i386/seg_helper.c
+++ b/target-i386/seg_helper.c
@@ -739,6 +739,12 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
         }
     }
 
+    /* interrupt gate clear IF mask */
+    if ((type & 1) == 0) {
+        env->eflags &= ~IF_MASK;
+    }
+    env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
+
     if (new_stack) {
         if (env->eflags & VM_MASK) {
             cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0, 0);
@@ -759,12 +765,6 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
                    e2);
     cpu_x86_set_cpl(env, dpl);
     env->eip = offset;
-
-    /* interrupt gate clear IF mask */
-    if ((type & 1) == 0) {
-        env->eflags &= ~IF_MASK;
-    }
-    env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
 }
 
 #ifdef TARGET_X86_64
@@ -911,6 +911,12 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int,
         PUSHQ(esp, error_code);
     }
 
+    /* interrupt gate clear IF mask */
+    if ((type & 1) == 0) {
+        env->eflags &= ~IF_MASK;
+    }
+    env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
+
     if (new_stack) {
         ss = 0 | dpl;
         cpu_x86_load_seg_cache(env, R_SS, ss, 0, 0, 0);
@@ -924,12 +930,6 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int,
                    e2);
     cpu_x86_set_cpl(env, dpl);
     env->eip = offset;
-
-    /* interrupt gate clear IF mask */
-    if ((type & 1) == 0) {
-        env->eflags &= ~IF_MASK;
-    }
-    env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK);
 }
 #endif
 
@@ -960,6 +960,8 @@ void helper_syscall(CPUX86State *env, int next_eip_addend)
 
         code64 = env->hflags & HF_CS64_MASK;
 
+        env->eflags &= ~env->fmask;
+        cpu_load_eflags(env, env->eflags, 0);
         cpu_x86_set_cpl(env, 0);
         cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
                            0, 0xffffffff,
@@ -972,8 +974,6 @@ void helper_syscall(CPUX86State *env, int next_eip_addend)
                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
                                DESC_S_MASK |
                                DESC_W_MASK | DESC_A_MASK);
-        env->eflags &= ~env->fmask;
-        cpu_load_eflags(env, env->eflags, 0);
         if (code64) {
             env->eip = env->lstar;
         } else {
@@ -982,6 +982,7 @@ void helper_syscall(CPUX86State *env, int next_eip_addend)
     } else {
         env->regs[R_ECX] = (uint32_t)(env->eip + next_eip_addend);
 
+        env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
         cpu_x86_set_cpl(env, 0);
         cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
                            0, 0xffffffff,
@@ -993,7 +994,6 @@ void helper_syscall(CPUX86State *env, int next_eip_addend)
                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
                                DESC_S_MASK |
                                DESC_W_MASK | DESC_A_MASK);
-        env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
         env->eip = (uint32_t)env->star;
     }
 }
@@ -1014,6 +1014,9 @@ void helper_sysret(CPUX86State *env, int dflag)
     }
     selector = (env->star >> 48) & 0xffff;
     if (env->hflags & HF_LMA_MASK) {
+        cpu_load_eflags(env, (uint32_t)(env->regs[11]), TF_MASK | AC_MASK
+                        | ID_MASK | IF_MASK | IOPL_MASK | VM_MASK | RF_MASK |
+                        NT_MASK);
         if (dflag == 2) {
             cpu_x86_load_seg_cache(env, R_CS, (selector + 16) | 3,
                                    0, 0xffffffff,
@@ -1035,11 +1038,9 @@ void helper_sysret(CPUX86State *env, int dflag)
                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
                                DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
                                DESC_W_MASK | DESC_A_MASK);
-        cpu_load_eflags(env, (uint32_t)(env->regs[11]), TF_MASK | AC_MASK
-                        | ID_MASK | IF_MASK | IOPL_MASK | VM_MASK | RF_MASK |
-                        NT_MASK);
         cpu_x86_set_cpl(env, 3);
     } else {
+        env->eflags |= IF_MASK;
         cpu_x86_load_seg_cache(env, R_CS, selector | 3,
                                0, 0xffffffff,
                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
@@ -1051,7 +1052,6 @@ void helper_sysret(CPUX86State *env, int dflag)
                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
                                DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
                                DESC_W_MASK | DESC_A_MASK);
-        env->eflags |= IF_MASK;
         cpu_x86_set_cpl(env, 3);
     }
 }

From 7848c8d19f8556666df25044bbd5d8b29439c368 Mon Sep 17 00:00:00 2001
From: Kevin O'Connor <kevin@koconnor.net>
Date: Tue, 29 Apr 2014 16:38:59 -0400
Subject: [PATCH 08/15] target-i386: the x86 CPL is stored in CS.selector -
 auto update hflags accordingly.

Instead of manually calling cpu_x86_set_cpl() when the CPL changes,
check for CPL changes on calls to cpu_x86_load_seg_cache(R_CS).  Every
location that called cpu_x86_set_cpl() also called
cpu_x86_load_seg_cache(R_CS), so cpu_x86_set_cpl() is no longer
required.

This fixes the SMM handler code as it was not setting/restoring the
CPL level manually.

Signed-off-by: Kevin O'Connor <kevin@koconnor.net>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 bsd-user/main.c          |  2 --
 linux-user/main.c        |  2 --
 target-i386/cpu.h        | 25 ++++++++++++-------------
 target-i386/seg_helper.c | 15 ---------------
 target-i386/svm_helper.c |  4 ----
 5 files changed, 12 insertions(+), 36 deletions(-)

diff --git a/bsd-user/main.c b/bsd-user/main.c
index f81ba55af8..9f895b4ddb 100644
--- a/bsd-user/main.c
+++ b/bsd-user/main.c
@@ -1003,8 +1003,6 @@ int main(int argc, char **argv)
     cpu->opaque = ts;
 
 #if defined(TARGET_I386)
-    cpu_x86_set_cpl(env, 3);
-
     env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
     env->hflags |= HF_PE_MASK;
     if (env->features[FEAT_1_EDX] & CPUID_SSE) {
diff --git a/linux-user/main.c b/linux-user/main.c
index c38fecfdd9..882186e1a0 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -4051,8 +4051,6 @@ int main(int argc, char **argv, char **envp)
 #endif
 
 #if defined(TARGET_I386)
-    cpu_x86_set_cpl(env, 3);
-
     env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK;
     env->hflags |= HF_PE_MASK;
     if (env->features[FEAT_1_EDX] & CPUID_SSE) {
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 2a22a7d64e..cae3a1e85f 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -124,9 +124,9 @@
 #define ID_MASK                 0x00200000
 
 /* hidden flags - used internally by qemu to represent additional cpu
-   states. Only the CPL, INHIBIT_IRQ, SMM and SVMI are not
-   redundant. We avoid using the IOPL_MASK, TF_MASK, VM_MASK and AC_MASK
-   bit positions to ease oring with eflags. */
+   states. Only the INHIBIT_IRQ, SMM and SVMI are not redundant. We
+   avoid using the IOPL_MASK, TF_MASK, VM_MASK and AC_MASK bit
+   positions to ease oring with eflags. */
 /* current cpl */
 #define HF_CPL_SHIFT         0
 /* true if soft mmu is being used */
@@ -974,6 +974,7 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env,
     /* update the hidden flags */
     {
         if (seg_reg == R_CS) {
+            int cpl = selector & 3;
 #ifdef TARGET_X86_64
             if ((env->hflags & HF_LMA_MASK) && (flags & DESC_L_MASK)) {
                 /* long mode */
@@ -983,11 +984,19 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env,
 #endif
             {
                 /* legacy / compatibility case */
+                if (!(env->cr[0] & CR0_PE_MASK))
+                    cpl = 0;
+                else if (env->eflags & VM_MASK)
+                    cpl = 3;
                 new_hflags = (env->segs[R_CS].flags & DESC_B_MASK)
                     >> (DESC_B_SHIFT - HF_CS32_SHIFT);
                 env->hflags = (env->hflags & ~(HF_CS32_MASK | HF_CS64_MASK)) |
                     new_hflags;
             }
+#if HF_CPL_MASK != 3
+#error HF_CPL_MASK is hardcoded
+#endif
+            env->hflags = (env->hflags & ~HF_CPL_MASK) | cpl;
         }
         new_hflags = (env->segs[R_SS].flags & DESC_B_MASK)
             >> (DESC_B_SHIFT - HF_SS32_SHIFT);
@@ -1031,16 +1040,6 @@ int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector,
                             target_ulong *base, unsigned int *limit,
                             unsigned int *flags);
 
-/* wrapper, just in case memory mappings must be changed */
-static inline void cpu_x86_set_cpl(CPUX86State *s, int cpl)
-{
-#if HF_CPL_MASK == 3
-    s->hflags = (s->hflags & ~HF_CPL_MASK) | cpl;
-#else
-#error HF_CPL_MASK is hardcoded
-#endif
-}
-
 /* op_helper.c */
 /* used for debug or cpu save/restore */
 void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f);
diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c
index 2e0b113bd9..3cf862ee60 100644
--- a/target-i386/seg_helper.c
+++ b/target-i386/seg_helper.c
@@ -409,11 +409,7 @@ static void switch_tss(CPUX86State *env, int tss_selector,
         for (i = 0; i < 6; i++) {
             load_seg_vm(env, i, new_segs[i]);
         }
-        /* in vm86, CPL is always 3 */
-        cpu_x86_set_cpl(env, 3);
     } else {
-        /* CPL is set the RPL of CS */
-        cpu_x86_set_cpl(env, new_segs[R_CS] & 3);
         /* first just selectors as the rest may trigger exceptions */
         for (i = 0; i < 6; i++) {
             cpu_x86_load_seg_cache(env, i, new_segs[i], 0, 0, 0);
@@ -763,7 +759,6 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int,
                    get_seg_base(e1, e2),
                    get_seg_limit(e1, e2),
                    e2);
-    cpu_x86_set_cpl(env, dpl);
     env->eip = offset;
 }
 
@@ -928,7 +923,6 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int,
                    get_seg_base(e1, e2),
                    get_seg_limit(e1, e2),
                    e2);
-    cpu_x86_set_cpl(env, dpl);
     env->eip = offset;
 }
 #endif
@@ -962,7 +956,6 @@ void helper_syscall(CPUX86State *env, int next_eip_addend)
 
         env->eflags &= ~env->fmask;
         cpu_load_eflags(env, env->eflags, 0);
-        cpu_x86_set_cpl(env, 0);
         cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
                            0, 0xffffffff,
                                DESC_G_MASK | DESC_P_MASK |
@@ -983,7 +976,6 @@ void helper_syscall(CPUX86State *env, int next_eip_addend)
         env->regs[R_ECX] = (uint32_t)(env->eip + next_eip_addend);
 
         env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK);
-        cpu_x86_set_cpl(env, 0);
         cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc,
                            0, 0xffffffff,
                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
@@ -1038,7 +1030,6 @@ void helper_sysret(CPUX86State *env, int dflag)
                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
                                DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
                                DESC_W_MASK | DESC_A_MASK);
-        cpu_x86_set_cpl(env, 3);
     } else {
         env->eflags |= IF_MASK;
         cpu_x86_load_seg_cache(env, R_CS, selector | 3,
@@ -1052,7 +1043,6 @@ void helper_sysret(CPUX86State *env, int dflag)
                                DESC_G_MASK | DESC_B_MASK | DESC_P_MASK |
                                DESC_S_MASK | (3 << DESC_DPL_SHIFT) |
                                DESC_W_MASK | DESC_A_MASK);
-        cpu_x86_set_cpl(env, 3);
     }
 }
 #endif
@@ -1905,7 +1895,6 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip,
                        get_seg_base(e1, e2),
                        get_seg_limit(e1, e2),
                        e2);
-        cpu_x86_set_cpl(env, dpl);
         SET_ESP(sp, sp_mask);
         env->eip = offset;
     }
@@ -2134,7 +2123,6 @@ static inline void helper_ret_protected(CPUX86State *env, int shift,
                        get_seg_base(e1, e2),
                        get_seg_limit(e1, e2),
                        e2);
-        cpu_x86_set_cpl(env, rpl);
         sp = new_esp;
 #ifdef TARGET_X86_64
         if (env->hflags & HF_CS64_MASK) {
@@ -2185,7 +2173,6 @@ static inline void helper_ret_protected(CPUX86State *env, int shift,
                     IF_MASK | IOPL_MASK | VM_MASK | NT_MASK | VIF_MASK |
                     VIP_MASK);
     load_seg_vm(env, R_CS, new_cs & 0xffff);
-    cpu_x86_set_cpl(env, 3);
     load_seg_vm(env, R_SS, new_ss & 0xffff);
     load_seg_vm(env, R_ES, new_es & 0xffff);
     load_seg_vm(env, R_DS, new_ds & 0xffff);
@@ -2238,7 +2225,6 @@ void helper_sysenter(CPUX86State *env)
         raise_exception_err(env, EXCP0D_GPF, 0);
     }
     env->eflags &= ~(VM_MASK | IF_MASK | RF_MASK);
-    cpu_x86_set_cpl(env, 0);
 
 #ifdef TARGET_X86_64
     if (env->hflags & HF_LMA_MASK) {
@@ -2274,7 +2260,6 @@ void helper_sysexit(CPUX86State *env, int dflag)
     if (env->sysenter_cs == 0 || cpl != 0) {
         raise_exception_err(env, EXCP0D_GPF, 0);
     }
-    cpu_x86_set_cpl(env, 3);
 #ifdef TARGET_X86_64
     if (dflag == 2) {
         cpu_x86_load_seg_cache(env, R_CS, ((env->sysenter_cs + 32) & 0xfffc) |
diff --git a/target-i386/svm_helper.c b/target-i386/svm_helper.c
index 848a4b9985..846eaa5918 100644
--- a/target-i386/svm_helper.c
+++ b/target-i386/svm_helper.c
@@ -282,9 +282,6 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend)
                           env->vm_vmcb + offsetof(struct vmcb, save.dr7));
     env->dr[6] = ldq_phys(cs->as,
                           env->vm_vmcb + offsetof(struct vmcb, save.dr6));
-    cpu_x86_set_cpl(env, ldub_phys(cs->as,
-                                   env->vm_vmcb + offsetof(struct vmcb,
-                                                           save.cpl)));
 
     /* FIXME: guest state consistency checks */
 
@@ -729,7 +726,6 @@ void helper_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1)
                           env->vm_hsave + offsetof(struct vmcb, save.dr7));
 
     /* other setups */
-    cpu_x86_set_cpl(env, 0);
     stq_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, control.exit_code),
              exit_code);
     stq_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_1),

From 50a2c6e55fa2ce5a2916a2c206bad2c6b0e06df1 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 20 Mar 2013 13:11:56 +0100
Subject: [PATCH 09/15] kvm: reset state from the CPU's reset method

Now that we have a CPU object with a reset method, it is better to
keep the KVM reset close to the CPU reset.  Using qemu_register_reset
as we do now keeps them far apart.

With this patch, PPC no longer calls the kvm_arch_ function, so
it can get removed there.  Other arches call it from their CPU
reset handler, and the function gets an ARMCPU/X86CPU/S390CPU.

Note that ARM- and s390-specific functions are called kvm_arm_*
and kvm_s390_*, while x86-specific functions are called kvm_arch_*.
That follows the convention used by the different architectures.
Changing that is the topic of a separate patch.

Reviewed-by: Gleb Natapov <gnatapov@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/sysemu/kvm.h   |  2 --
 kvm-all.c              | 11 -----------
 target-arm/cpu.c       |  7 +++++++
 target-arm/kvm32.c     |  4 +---
 target-arm/kvm64.c     |  2 +-
 target-arm/kvm_arm.h   |  8 ++++++++
 target-i386/cpu.c      |  5 +++++
 target-i386/kvm.c      |  3 +--
 target-i386/kvm_i386.h |  1 +
 target-ppc/kvm.c       |  4 ----
 target-s390x/cpu.c     |  4 ++++
 target-s390x/cpu.h     |  5 +++++
 target-s390x/kvm.c     |  6 ++++--
 13 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h
index a6c2823352..e7ad9d159a 100644
--- a/include/sysemu/kvm.h
+++ b/include/sysemu/kvm.h
@@ -245,8 +245,6 @@ int kvm_arch_init_vcpu(CPUState *cpu);
 /* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */
 unsigned long kvm_arch_vcpu_id(CPUState *cpu);
 
-void kvm_arch_reset_vcpu(CPUState *cpu);
-
 int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr);
 int kvm_arch_on_sigbus(int code, void *addr);
 
diff --git a/kvm-all.c b/kvm-all.c
index 94520e5915..a343ede4d4 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -223,13 +223,6 @@ static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
     return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
 }
 
-static void kvm_reset_vcpu(void *opaque)
-{
-    CPUState *cpu = opaque;
-
-    kvm_arch_reset_vcpu(cpu);
-}
-
 int kvm_init_vcpu(CPUState *cpu)
 {
     KVMState *s = kvm_state;
@@ -269,10 +262,6 @@ int kvm_init_vcpu(CPUState *cpu)
     }
 
     ret = kvm_arch_init_vcpu(cpu);
-    if (ret == 0) {
-        qemu_register_reset(kvm_reset_vcpu, cpu);
-        kvm_arch_reset_vcpu(cpu);
-    }
 err:
     return ret;
 }
diff --git a/target-arm/cpu.c b/target-arm/cpu.c
index c0ddc3e3df..6c6f2b3d46 100644
--- a/target-arm/cpu.c
+++ b/target-arm/cpu.c
@@ -29,6 +29,7 @@
 #include "hw/arm/arm.h"
 #include "sysemu/sysemu.h"
 #include "sysemu/kvm.h"
+#include "kvm_arm.h"
 
 static void arm_cpu_set_pc(CPUState *cs, vaddr value)
 {
@@ -165,6 +166,12 @@ static void arm_cpu_reset(CPUState *s)
      * tb_flush().
      */
     tb_flush(env);
+
+#ifndef CONFIG_USER_ONLY
+    if (kvm_enabled()) {
+        kvm_arm_reset_vcpu(cpu);
+    }
+#endif
 }
 
 #ifndef CONFIG_USER_ONLY
diff --git a/target-arm/kvm32.c b/target-arm/kvm32.c
index a690d9935f..b79750c57e 100644
--- a/target-arm/kvm32.c
+++ b/target-arm/kvm32.c
@@ -510,11 +510,9 @@ int kvm_arch_get_registers(CPUState *cs)
     return 0;
 }
 
-void kvm_arch_reset_vcpu(CPUState *cs)
+void kvm_arm_reset_vcpu(ARMCPU *cpu)
 {
     /* Feed the kernel back its initial register state */
-    ARMCPU *cpu = ARM_CPU(cs);
-
     memmove(cpu->cpreg_values, cpu->cpreg_reset_values,
             cpu->cpreg_array_len * sizeof(cpu->cpreg_values[0]));
 
diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c
index e115879d9a..c729b9ec9f 100644
--- a/target-arm/kvm64.c
+++ b/target-arm/kvm64.c
@@ -260,6 +260,6 @@ int kvm_arch_get_registers(CPUState *cs)
     return ret;
 }
 
-void kvm_arch_reset_vcpu(CPUState *cs)
+void kvm_arm_reset_vcpu(ARMCPU *cpu)
 {
 }
diff --git a/target-arm/kvm_arm.h b/target-arm/kvm_arm.h
index 137c5671e9..dc4e2336fa 100644
--- a/target-arm/kvm_arm.h
+++ b/target-arm/kvm_arm.h
@@ -67,6 +67,14 @@ bool write_list_to_kvmstate(ARMCPU *cpu);
  */
 bool write_kvmstate_to_list(ARMCPU *cpu);
 
+/**
+ * kvm_arm_reset_vcpu:
+ * @cpu: ARMCPU
+ *
+ * Called at reset time to kernel registers to their initial values.
+ */
+void kvm_arm_reset_vcpu(ARMCPU *cpu);
+
 #ifdef CONFIG_KVM
 /**
  * kvm_arm_create_scratch_host_vcpu:
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 8f193a9330..c04aed929f 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -24,6 +24,7 @@
 #include "cpu.h"
 #include "sysemu/kvm.h"
 #include "sysemu/cpus.h"
+#include "kvm_i386.h"
 #include "topology.h"
 
 #include "qemu/option.h"
@@ -2494,6 +2495,10 @@ static void x86_cpu_reset(CPUState *s)
     }
 
     s->halted = !cpu_is_bsp(cpu);
+
+    if (kvm_enabled()) {
+        kvm_arch_reset_vcpu(cpu);
+    }
 #endif
 }
 
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index d17eea3186..2882e14dad 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -723,9 +723,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
     return 0;
 }
 
-void kvm_arch_reset_vcpu(CPUState *cs)
+void kvm_arch_reset_vcpu(X86CPU *cpu)
 {
-    X86CPU *cpu = X86_CPU(cs);
     CPUX86State *env = &cpu->env;
 
     env->exception_injected = -1;
diff --git a/target-i386/kvm_i386.h b/target-i386/kvm_i386.h
index 4392ab4359..b0b2193b1f 100644
--- a/target-i386/kvm_i386.h
+++ b/target-i386/kvm_i386.h
@@ -14,6 +14,7 @@
 #include "sysemu/kvm.h"
 
 bool kvm_allows_irq0_override(void);
+void kvm_arch_reset_vcpu(X86CPU *cs);
 
 int kvm_device_pci_assign(KVMState *s, PCIHostDeviceAddress *dev_addr,
                           uint32_t flags, uint32_t *dev_id);
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 4b81e5f253..8ff1777dcb 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -430,10 +430,6 @@ int kvm_arch_init_vcpu(CPUState *cs)
     return ret;
 }
 
-void kvm_arch_reset_vcpu(CPUState *cpu)
-{
-}
-
 static void kvm_sw_tlb_put(PowerPCCPU *cpu)
 {
     CPUPPCState *env = &cpu->env;
diff --git a/target-s390x/cpu.c b/target-s390x/cpu.c
index dfd83e8aef..c3082b73c5 100644
--- a/target-s390x/cpu.c
+++ b/target-s390x/cpu.c
@@ -152,6 +152,10 @@ static void s390_cpu_full_reset(CPUState *s)
      * after incrementing the cpu counter */
 #if !defined(CONFIG_USER_ONLY)
     s->halted = 1;
+
+    if (kvm_enabled()) {
+        kvm_s390_reset_vcpu(cpu);
+    }
 #endif
     tlb_flush(s, 1);
 }
diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h
index aad277af49..06454d6da5 100644
--- a/target-s390x/cpu.h
+++ b/target-s390x/cpu.h
@@ -359,11 +359,16 @@ void s390x_cpu_timer(void *opaque);
 int s390_virtio_hypercall(CPUS390XState *env);
 
 #ifdef CONFIG_KVM
+void kvm_s390_reset_vcpu(S390CPU *cpu);
 void kvm_s390_interrupt(S390CPU *cpu, int type, uint32_t code);
 void kvm_s390_virtio_irq(S390CPU *cpu, int config_change, uint64_t token);
 void kvm_s390_interrupt_internal(S390CPU *cpu, int type, uint32_t parm,
                                  uint64_t parm64, int vm);
 #else
+static inline void kvm_s390_reset_vcpu(S390CPU *cpu)
+{
+}
+
 static inline void kvm_s390_interrupt(S390CPU *cpu, int type, uint32_t code)
 {
 }
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index ba2dffe3dd..56179afece 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -117,14 +117,16 @@ int kvm_arch_init_vcpu(CPUState *cpu)
     return 0;
 }
 
-void kvm_arch_reset_vcpu(CPUState *cpu)
+void kvm_s390_reset_vcpu(S390CPU *cpu)
 {
+    CPUState *cs = CPU(cpu);
+
     /* The initial reset call is needed here to reset in-kernel
      * vcpu data that we can't access directly from QEMU
      * (i.e. with older kernels which don't support sync_regs/ONE_REG).
      * Before this ioctl cpu_synchronize_state() is called in common kvm
      * code (kvm-all) */
-    if (kvm_vcpu_ioctl(cpu, KVM_S390_INITIAL_RESET, NULL)) {
+    if (kvm_vcpu_ioctl(cs, KVM_S390_INITIAL_RESET, NULL)) {
         perror("Can't reset vcpu\n");
     }
 }

From e0723c451028102d9165e21424b4833376ce9666 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 8 Mar 2013 19:21:50 +0100
Subject: [PATCH 10/15] kvm: forward INIT signals coming from the chipset

Reviewed-by: Gleb Natapov <gnatapov@redhat.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-i386/helper.c   |  4 ++++
 target-i386/kvm.c      | 36 +++++++++++++++++++++++++-----------
 target-i386/kvm_i386.h |  1 +
 3 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/target-i386/helper.c b/target-i386/helper.c
index 372f0e3ecb..27b35826e9 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -19,6 +19,7 @@
 
 #include "cpu.h"
 #include "sysemu/kvm.h"
+#include "kvm_i386.h"
 #ifndef CONFIG_USER_ONLY
 #include "sysemu/sysemu.h"
 #include "monitor/monitor.h"
@@ -1335,6 +1336,9 @@ void do_cpu_init(X86CPU *cpu)
     cpu_reset(cs);
     cs->interrupt_request = sipi;
     env->pat = pat;
+    if (kvm_enabled()) {
+        kvm_arch_do_init_vcpu(cpu);
+    }
     apic_init_reset(cpu->apic_state);
 }
 
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 2882e14dad..0d894ef4aa 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -30,6 +30,8 @@
 #include "qemu/config-file.h"
 #include "hw/i386/pc.h"
 #include "hw/i386/apic.h"
+#include "hw/i386/apic_internal.h"
+#include "hw/i386/apic-msidef.h"
 #include "exec/ioport.h"
 #include <asm/hyperv.h>
 #include "hw/pci/pci.h"
@@ -738,6 +740,16 @@ void kvm_arch_reset_vcpu(X86CPU *cpu)
     }
 }
 
+void kvm_arch_do_init_vcpu(X86CPU *cpu)
+{
+    CPUX86State *env = &cpu->env;
+
+    /* APs get directly into wait-for-SIPI state.  */
+    if (env->mp_state == KVM_MP_STATE_UNINITIALIZED) {
+        env->mp_state = KVM_MP_STATE_INIT_RECEIVED;
+    }
+}
+
 static int kvm_get_supported_msrs(KVMState *s)
 {
     static int kvm_supported_msrs;
@@ -2003,14 +2015,15 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run)
         }
     }
 
-    if (!kvm_irqchip_in_kernel()) {
-        /* Force the VCPU out of its inner loop to process any INIT requests
-         * or pending TPR access reports. */
-        if (cpu->interrupt_request &
-            (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
-            cpu->exit_request = 1;
-        }
+    /* Force the VCPU out of its inner loop to process any INIT requests
+     * or (for userspace APIC, but it is cheap to combine the checks here)
+     * pending TPR access reports.
+     */
+    if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) {
+        cpu->exit_request = 1;
+    }
 
+    if (!kvm_irqchip_in_kernel()) {
         /* Try to inject an interrupt if the guest can accept it */
         if (run->ready_for_interrupt_injection &&
             (cpu->interrupt_request & CPU_INTERRUPT_HARD) &&
@@ -2090,6 +2103,11 @@ int kvm_arch_process_async_events(CPUState *cs)
         }
     }
 
+    if (cs->interrupt_request & CPU_INTERRUPT_INIT) {
+        kvm_cpu_synchronize_state(cs);
+        do_cpu_init(cpu);
+    }
+
     if (kvm_irqchip_in_kernel()) {
         return 0;
     }
@@ -2103,10 +2121,6 @@ int kvm_arch_process_async_events(CPUState *cs)
         (cs->interrupt_request & CPU_INTERRUPT_NMI)) {
         cs->halted = 0;
     }
-    if (cs->interrupt_request & CPU_INTERRUPT_INIT) {
-        kvm_cpu_synchronize_state(cs);
-        do_cpu_init(cpu);
-    }
     if (cs->interrupt_request & CPU_INTERRUPT_SIPI) {
         kvm_cpu_synchronize_state(cs);
         do_cpu_sipi(cpu);
diff --git a/target-i386/kvm_i386.h b/target-i386/kvm_i386.h
index b0b2193b1f..cac30fd381 100644
--- a/target-i386/kvm_i386.h
+++ b/target-i386/kvm_i386.h
@@ -15,6 +15,7 @@
 
 bool kvm_allows_irq0_override(void);
 void kvm_arch_reset_vcpu(X86CPU *cs);
+void kvm_arch_do_init_vcpu(X86CPU *cs);
 
 int kvm_device_pci_assign(KVMState *s, PCIHostDeviceAddress *dev_addr,
                           uint32_t flags, uint32_t *dev_id);

From 05e7e819d7d159a75a46354aead95e1199b8f168 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 29 Apr 2014 13:10:05 +0200
Subject: [PATCH 11/15] target-i386: fix set of registers zeroed on reset

BND0-3, BNDCFGU, BNDCFGS, BNDSTATUS were not zeroed on reset, but they
should be (Intel Instruction Set Extensions Programming Reference
319433-015, pages 9-4 and 9-6).  Same for YMM.

XCR0 should be reset to 1.

TSC and TSC_RESET were zeroed already by the memset, remove the explicit
assignments.

Cc: Andreas Faerber <afaerber@suse.de>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-i386/cpu.c |  3 +--
 target-i386/cpu.h | 11 ++++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index c04aed929f..79b1bb92f0 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2485,8 +2485,7 @@ static void x86_cpu_reset(CPUState *s)
     cpu_breakpoint_remove_all(s, BP_CPU);
     cpu_watchpoint_remove_all(s, BP_CPU);
 
-    env->tsc_adjust = 0;
-    env->tsc = 0;
+    env->xcr0 = 1;
 
 #if !defined(CONFIG_USER_ONLY)
     /* We hard-wire the BSP to the first CPU. */
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index cae3a1e85f..827b33e696 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -797,6 +797,10 @@ typedef struct CPUX86State {
     target_ulong cr[5]; /* NOTE: cr1 is unused */
     int32_t a20_mask;
 
+    BNDReg bnd_regs[4];
+    BNDCSReg bndcs_regs;
+    uint64_t msr_bndcfgs;
+
     /* FPU state */
     unsigned int fpstt; /* top of stack index */
     uint16_t fpus;
@@ -819,6 +823,8 @@ typedef struct CPUX86State {
     XMMReg xmm_t0;
     MMXReg mmx_t0;
 
+    XMMReg ymmh_regs[CPU_NB_REGS];
+
     /* sysenter registers */
     uint32_t sysenter_cs;
     target_ulong sysenter_esp;
@@ -928,12 +934,7 @@ typedef struct CPUX86State {
     uint16_t fpus_vmstate;
     uint16_t fptag_vmstate;
     uint16_t fpregs_format_vmstate;
-
     uint64_t xstate_bv;
-    XMMReg ymmh_regs[CPU_NB_REGS];
-    BNDReg bnd_regs[4];
-    BNDCSReg bndcs_regs;
-    uint64_t msr_bndcfgs;
 
     uint64_t xcr0;
 

From 43175fa96add507afee6c0a83ec9ffe0ca130fc3 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 12 Mar 2013 13:16:28 +0100
Subject: [PATCH 12/15] target-i386: preserve FPU and MSR state on INIT

Most MSRs, plus the FPU, MMX, MXCSR, XMM and YMM registers should not
be zeroed on INIT (Table 9-1 in the Intel SDM).  Copy them out of
CPUX86State and back in, instead of special casing env->pat.

The relevant fields are already consecutive except PAT and SMBASE.
However:

- KVM and Hyper-V MSRs should be reset because they include memory
locations written by the hypervisor.  These MSRs are moved together
at the end of the preserved area.

- SVM state can be moved out of the way since it is written by VMRUN.

Cc: Andreas Faerber <afaerber@suse.de>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 target-i386/cpu.c    |  3 +--
 target-i386/cpu.h    | 42 ++++++++++++++++++++++++++----------------
 target-i386/helper.c | 10 ++++++++--
 3 files changed, 35 insertions(+), 20 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index 79b1bb92f0..042a48d703 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -2418,8 +2418,7 @@ static void x86_cpu_reset(CPUState *s)
 
     xcc->parent_reset(s);
 
-
-    memset(env, 0, offsetof(CPUX86State, pat));
+    memset(env, 0, offsetof(CPUX86State, cpuid_level));
 
     tlb_flush(s, 1);
 
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 827b33e696..5fd1e20b57 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -801,6 +801,9 @@ typedef struct CPUX86State {
     BNDCSReg bndcs_regs;
     uint64_t msr_bndcfgs;
 
+    /* Beginning of state preserved by INIT (dummy marker).  */
+    struct {} start_init_save;
+
     /* FPU state */
     unsigned int fpstt; /* top of stack index */
     uint16_t fpus;
@@ -833,15 +836,6 @@ typedef struct CPUX86State {
     uint64_t star;
 
     uint64_t vm_hsave;
-    uint64_t vm_vmcb;
-    uint64_t tsc_offset;
-    uint64_t intercept;
-    uint16_t intercept_cr_read;
-    uint16_t intercept_cr_write;
-    uint16_t intercept_dr_read;
-    uint16_t intercept_dr_write;
-    uint32_t intercept_exceptions;
-    uint8_t v_tpr;
 
 #ifdef TARGET_X86_64
     target_ulong lstar;
@@ -849,11 +843,6 @@ typedef struct CPUX86State {
     target_ulong fmask;
     target_ulong kernelgsbase;
 #endif
-    uint64_t system_time_msr;
-    uint64_t wall_clock_msr;
-    uint64_t steal_time_msr;
-    uint64_t async_pf_en_msr;
-    uint64_t pv_eoi_en_msr;
 
     uint64_t tsc;
     uint64_t tsc_adjust;
@@ -870,6 +859,19 @@ typedef struct CPUX86State {
     uint64_t msr_fixed_counters[MAX_FIXED_COUNTERS];
     uint64_t msr_gp_counters[MAX_GP_COUNTERS];
     uint64_t msr_gp_evtsel[MAX_GP_COUNTERS];
+
+    uint64_t pat;
+    uint32_t smbase;
+
+    /* End of state preserved by INIT (dummy marker).  */
+    struct {} end_init_save;
+
+    uint64_t system_time_msr;
+    uint64_t wall_clock_msr;
+    uint64_t steal_time_msr;
+    uint64_t async_pf_en_msr;
+    uint64_t pv_eoi_en_msr;
+
     uint64_t msr_hv_hypercall;
     uint64_t msr_hv_guest_os_id;
     uint64_t msr_hv_vapic;
@@ -884,9 +886,18 @@ typedef struct CPUX86State {
         struct CPUBreakpoint *cpu_breakpoint[4];
         struct CPUWatchpoint *cpu_watchpoint[4];
     }; /* break/watchpoints for dr[0..3] */
-    uint32_t smbase;
     int old_exception;  /* exception in flight */
 
+    uint64_t vm_vmcb;
+    uint64_t tsc_offset;
+    uint64_t intercept;
+    uint16_t intercept_cr_read;
+    uint16_t intercept_cr_write;
+    uint16_t intercept_dr_read;
+    uint16_t intercept_dr_write;
+    uint32_t intercept_exceptions;
+    uint8_t v_tpr;
+
     /* KVM states, automatically cleared on reset */
     uint8_t nmi_injected;
     uint8_t nmi_pending;
@@ -894,7 +905,6 @@ typedef struct CPUX86State {
     CPU_COMMON
 
     /* Fields from here on are preserved across CPU reset. */
-    uint64_t pat;
 
     /* processor features (e.g. for CPUID insn) */
     uint32_t cpuid_level;
diff --git a/target-i386/helper.c b/target-i386/helper.c
index 27b35826e9..46d20e4b89 100644
--- a/target-i386/helper.c
+++ b/target-i386/helper.c
@@ -1330,12 +1330,18 @@ void do_cpu_init(X86CPU *cpu)
 {
     CPUState *cs = CPU(cpu);
     CPUX86State *env = &cpu->env;
+    CPUX86State *save = g_new(CPUX86State, 1);
     int sipi = cs->interrupt_request & CPU_INTERRUPT_SIPI;
-    uint64_t pat = env->pat;
+
+    *save = *env;
 
     cpu_reset(cs);
     cs->interrupt_request = sipi;
-    env->pat = pat;
+    memcpy(&env->start_init_save, &save->start_init_save,
+           offsetof(CPUX86State, end_init_save) -
+           offsetof(CPUX86State, start_init_save));
+    g_free(save);
+
     if (kvm_enabled()) {
         kvm_arch_do_init_vcpu(cpu);
     }

From 7b4d915e11ae7afb2d42a8cae90db26bc0c142b8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 19 Mar 2013 14:17:18 +0100
Subject: [PATCH 13/15] apic: do not accept SIPI on the bootstrap processor

SIPI interrupts are ignored on the bootstrap.  Never accept one.

Cc: Andreas Faerber <afaerber@suse.de>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/intc/apic_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c
index 71376533ca..ce3d903b13 100644
--- a/hw/intc/apic_common.c
+++ b/hw/intc/apic_common.c
@@ -200,7 +200,7 @@ void apic_init_reset(DeviceState *dev)
     s->initial_count = 0;
     s->initial_count_load_time = 0;
     s->next_time = 0;
-    s->wait_for_sipi = 1;
+    s->wait_for_sipi = !cpu_is_bsp(s->cpu);
 
     if (s->timer) {
         timer_del(s->timer);

From 4a92a558f49cb0693e36bd6d4f9217f298045be2 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 5 Mar 2013 15:35:17 +0100
Subject: [PATCH 14/15] cpu: make CPU_INTERRUPT_RESET available on all targets

On the x86, some devices need access to the CPU reset pin (INIT#).
Provide a generic service to do this, using one of the internal
cpu_interrupt targets.  Generalize the PPC-specific code for
CPU_INTERRUPT_RESET to other targets.

Since PPC does not support migration across QEMU versions (its
machine types are not versioned yet), I picked the value that
is used on x86, CPU_INTERRUPT_TGT_INT_1.  Consequently, TGT_INT_2
and TGT_INT_3 are shifted down by one while keeping their value.

Reviewed-by: Anthony Liguori <aliguori@us.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 cpu-exec.c             | 23 +++++++++++++----------
 include/exec/cpu-all.h |  8 +++++---
 target-i386/cpu.h      |  7 ++++---
 target-ppc/cpu.h       |  3 ---
 4 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/cpu-exec.c b/cpu-exec.c
index 2f54054d8c..38e5f02a30 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -335,6 +335,18 @@ int cpu_exec(CPUArchState *env)
                         cpu_loop_exit(cpu);
                     }
 #endif
+#if defined(TARGET_I386)
+                    if (interrupt_request & CPU_INTERRUPT_INIT) {
+                        cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0);
+                        do_cpu_init(x86_cpu);
+                        cpu->exception_index = EXCP_HALTED;
+                        cpu_loop_exit(cpu);
+                    }
+#else
+                    if (interrupt_request & CPU_INTERRUPT_RESET) {
+                        cpu_reset(cpu);
+                    }
+#endif
 #if defined(TARGET_I386)
 #if !defined(CONFIG_USER_ONLY)
                     if (interrupt_request & CPU_INTERRUPT_POLL) {
@@ -342,13 +354,7 @@ int cpu_exec(CPUArchState *env)
                         apic_poll_irq(x86_cpu->apic_state);
                     }
 #endif
-                    if (interrupt_request & CPU_INTERRUPT_INIT) {
-                            cpu_svm_check_intercept_param(env, SVM_EXIT_INIT,
-                                                          0);
-                            do_cpu_init(x86_cpu);
-                            cpu->exception_index = EXCP_HALTED;
-                            cpu_loop_exit(cpu);
-                    } else if (interrupt_request & CPU_INTERRUPT_SIPI) {
+                    if (interrupt_request & CPU_INTERRUPT_SIPI) {
                             do_cpu_sipi(x86_cpu);
                     } else if (env->hflags2 & HF2_GIF_MASK) {
                         if ((interrupt_request & CPU_INTERRUPT_SMI) &&
@@ -405,9 +411,6 @@ int cpu_exec(CPUArchState *env)
                         }
                     }
 #elif defined(TARGET_PPC)
-                    if ((interrupt_request & CPU_INTERRUPT_RESET)) {
-                        cpu_reset(cpu);
-                    }
                     if (interrupt_request & CPU_INTERRUPT_HARD) {
                         ppc_hw_interrupt(env);
                         if (env->pending_interrupts == 0) {
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index fb649a4029..9cab592dc5 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -381,6 +381,9 @@ CPUArchState *cpu_copy(CPUArchState *env);
 /* Debug event pending.  */
 #define CPU_INTERRUPT_DEBUG       0x0080
 
+/* Reset signal.  */
+#define CPU_INTERRUPT_RESET       0x0400
+
 /* Several target-specific external hardware interrupts.  Each target/cpu.h
    should define proper names based on these defines.  */
 #define CPU_INTERRUPT_TGT_EXT_0   0x0008
@@ -395,9 +398,8 @@ CPUArchState *cpu_copy(CPUArchState *env);
    instruction being executed.  These, therefore, are not masked while
    single-stepping within the debugger.  */
 #define CPU_INTERRUPT_TGT_INT_0   0x0100
-#define CPU_INTERRUPT_TGT_INT_1   0x0400
-#define CPU_INTERRUPT_TGT_INT_2   0x0800
-#define CPU_INTERRUPT_TGT_INT_3   0x2000
+#define CPU_INTERRUPT_TGT_INT_1   0x0800
+#define CPU_INTERRUPT_TGT_INT_2   0x2000
 
 /* First unused bit: 0x4000.  */
 
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 5fd1e20b57..e9cbdabc03 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -606,10 +606,11 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
 #define CPU_INTERRUPT_NMI       CPU_INTERRUPT_TGT_EXT_3
 #define CPU_INTERRUPT_MCE       CPU_INTERRUPT_TGT_EXT_4
 #define CPU_INTERRUPT_VIRQ      CPU_INTERRUPT_TGT_INT_0
-#define CPU_INTERRUPT_INIT      CPU_INTERRUPT_TGT_INT_1
-#define CPU_INTERRUPT_SIPI      CPU_INTERRUPT_TGT_INT_2
-#define CPU_INTERRUPT_TPR       CPU_INTERRUPT_TGT_INT_3
+#define CPU_INTERRUPT_SIPI      CPU_INTERRUPT_TGT_INT_1
+#define CPU_INTERRUPT_TPR       CPU_INTERRUPT_TGT_INT_2
 
+/* Use a clearer name for this.  */
+#define CPU_INTERRUPT_INIT      CPU_INTERRUPT_RESET
 
 typedef enum {
     CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index d4983405a2..75ed5fa636 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -2042,9 +2042,6 @@ enum {
     PPC_INTERRUPT_PERFM,          /* Performance monitor interrupt        */
 };
 
-/* CPU should be reset next, restart from scratch afterwards */
-#define CPU_INTERRUPT_RESET       CPU_INTERRUPT_TGT_INT_0
-
 /*****************************************************************************/
 
 static inline target_ulong cpu_read_xer(CPUPPCState *env)

From 4700a316df7d2cdcd256dcd64a10cec643f4dfa1 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Tue, 5 Mar 2013 15:04:36 +0100
Subject: [PATCH 15/15] pc: port 92 reset requires a low->high transition

The PIIX datasheet says that "before another INIT pulse can be
generated via [port 92h], [bit 0] must be written back to a
zero.

This bug is masked right now because a full reset will clear the
value of port 92h.  But once we implement soft reset correctly,
the next attempt to enable the A20 line by setting bit 1 (and
leaving the others untouched) will cause another reset.

Reviewed-by: Anthony Liguori <aliguori@us.ibm.com>
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 hw/i386/pc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/hw/i386/pc.c b/hw/i386/pc.c
index 07de2384ad..e6369d5be6 100644
--- a/hw/i386/pc.c
+++ b/hw/i386/pc.c
@@ -471,11 +471,12 @@ static void port92_write(void *opaque, hwaddr addr, uint64_t val,
                          unsigned size)
 {
     Port92State *s = opaque;
+    int oldval = s->outport;
 
     DPRINTF("port92: write 0x%02x\n", val);
     s->outport = val;
     qemu_set_irq(*s->a20_out, (val >> 1) & 1);
-    if (val & 1) {
+    if ((val & 1) && !(oldval & 1)) {
         qemu_system_reset_request();
     }
 }