diff --git a/Makefile.objs b/Makefile.objs
index adb584234e..b21f9d3894 100644
--- a/Makefile.objs
+++ b/Makefile.objs
@@ -141,7 +141,7 @@ common-obj-y += $(addprefix ui/, $(ui-obj-y))
 
 common-obj-y += iov.o acl.o
 common-obj-$(CONFIG_THREAD) += qemu-thread.o
-common-obj-$(CONFIG_IOTHREAD) += compatfd.o
+common-obj-$(CONFIG_POSIX) += compatfd.o
 common-obj-y += notify.o event_notifier.o
 common-obj-y += qemu-timer.o qemu-timer-common.o
 
diff --git a/Makefile.target b/Makefile.target
index a6c30ddc23..5a0fd40696 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -37,7 +37,7 @@ ifndef CONFIG_HAIKU
 LIBS+=-lm
 endif
 
-kvm.o kvm-all.o vhost.o vhost_net.o: QEMU_CFLAGS+=$(KVM_CFLAGS)
+kvm.o kvm-all.o vhost.o vhost_net.o kvmclock.o: QEMU_CFLAGS+=$(KVM_CFLAGS)
 
 config-target.h: config-target.h-timestamp
 config-target.h-timestamp: config-target.mak
@@ -218,7 +218,7 @@ obj-i386-y += cirrus_vga.o apic.o ioapic.o piix_pci.o
 obj-i386-y += vmport.o applesmc.o
 obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o
 obj-i386-y += debugcon.o multiboot.o
-obj-i386-y += pc_piix.o
+obj-i386-y += pc_piix.o kvmclock.o
 obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o
 
 # shared objects
diff --git a/configure b/configure
index 598e8e1ebb..a3f53456f4 100755
--- a/configure
+++ b/configure
@@ -2057,6 +2057,12 @@ EOF
 
 if compile_prog "" "" ; then
   signalfd=yes
+elif test "$kvm" = "yes" -a "$io_thread" != "yes"; then
+  echo
+  echo "ERROR: Host kernel lacks signalfd() support,"
+  echo "but KVM depends on it when the IO thread is disabled."
+  echo
+  exit 1
 fi
 
 # check if eventfd is supported
diff --git a/cpu-all.h b/cpu-all.h
index ffbd6a4df8..87b0f86667 100644
--- a/cpu-all.h
+++ b/cpu-all.h
@@ -959,6 +959,12 @@ int cpu_physical_memory_get_dirty_tracking(void);
 int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
                                    target_phys_addr_t end_addr);
 
+int cpu_physical_log_start(target_phys_addr_t start_addr,
+                           ram_addr_t size);
+
+int cpu_physical_log_stop(target_phys_addr_t start_addr,
+                          ram_addr_t size);
+
 void dump_exec_info(FILE *f, fprintf_function cpu_fprintf);
 #endif /* !CONFIG_USER_ONLY */
 
diff --git a/cpu-common.h b/cpu-common.h
index 6d4a898ad1..54d21d4717 100644
--- a/cpu-common.h
+++ b/cpu-common.h
@@ -96,6 +96,10 @@ struct CPUPhysMemoryClient {
                              target_phys_addr_t end_addr);
     int (*migration_log)(struct CPUPhysMemoryClient *client,
                          int enable);
+    int (*log_start)(struct CPUPhysMemoryClient *client,
+                     target_phys_addr_t phys_addr, ram_addr_t size);
+    int (*log_stop)(struct CPUPhysMemoryClient *client,
+                    target_phys_addr_t phys_addr, ram_addr_t size);
     QLIST_ENTRY(CPUPhysMemoryClient) list;
 };
 
diff --git a/cpu-defs.h b/cpu-defs.h
index 8d4bf86c53..db809ed465 100644
--- a/cpu-defs.h
+++ b/cpu-defs.h
@@ -205,6 +205,7 @@ typedef struct CPUWatchpoint {
     uint32_t stopped; /* Artificially stopped */                        \
     struct QemuThread *thread;                                          \
     struct QemuCond *halt_cond;                                         \
+    int thread_kicked;                                                  \
     struct qemu_work_item *queued_work_first, *queued_work_last;        \
     const char *cpu_model_str;                                          \
     struct KVMState *kvm_state;                                         \
diff --git a/cpu-exec.c b/cpu-exec.c
index 8c9fb8b1a2..b03b3a749a 100644
--- a/cpu-exec.c
+++ b/cpu-exec.c
@@ -196,28 +196,6 @@ static inline TranslationBlock *tb_find_fast(void)
     return tb;
 }
 
-static CPUDebugExcpHandler *debug_excp_handler;
-
-CPUDebugExcpHandler *cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler)
-{
-    CPUDebugExcpHandler *old_handler = debug_excp_handler;
-
-    debug_excp_handler = handler;
-    return old_handler;
-}
-
-static void cpu_handle_debug_exception(CPUState *env)
-{
-    CPUWatchpoint *wp;
-
-    if (!env->watchpoint_hit)
-        QTAILQ_FOREACH(wp, &env->watchpoints, entry)
-            wp->flags &= ~BP_WATCHPOINT_HIT;
-
-    if (debug_excp_handler)
-        debug_excp_handler(env);
-}
-
 /* main execution loop */
 
 volatile sig_atomic_t exit_request;
@@ -248,13 +226,11 @@ int cpu_exec(CPUState *env1)
     }
 
 #if defined(TARGET_I386)
-    if (!kvm_enabled()) {
-        /* put eflags in CPU temporary format */
-        CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
-        DF = 1 - (2 * ((env->eflags >> 10) & 1));
-        CC_OP = CC_OP_EFLAGS;
-        env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
-    }
+    /* put eflags in CPU temporary format */
+    CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
+    DF = 1 - (2 * ((env->eflags >> 10) & 1));
+    CC_OP = CC_OP_EFLAGS;
+    env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C);
 #elif defined(TARGET_SPARC)
 #elif defined(TARGET_M68K)
     env->cc_op = CC_OP_FLAGS;
@@ -279,7 +255,7 @@ int cpu_exec(CPUState *env1)
         if (setjmp(env->jmp_env) == 0) {
 #if defined(__sparc__) && !defined(CONFIG_SOLARIS)
 #undef env
-                    env = cpu_single_env;
+            env = cpu_single_env;
 #define env cpu_single_env
 #endif
             /* if an exception is pending, we execute it here */
@@ -287,8 +263,6 @@ int cpu_exec(CPUState *env1)
                 if (env->exception_index >= EXCP_INTERRUPT) {
                     /* exit request from the cpu execution loop */
                     ret = env->exception_index;
-                    if (ret == EXCP_DEBUG)
-                        cpu_handle_debug_exception(env);
                     break;
                 } else {
 #if defined(CONFIG_USER_ONLY)
@@ -340,11 +314,6 @@ int cpu_exec(CPUState *env1)
                 }
             }
 
-            if (kvm_enabled()) {
-                kvm_cpu_exec(env);
-                longjmp(env->jmp_env, 1);
-            }
-
             next_tb = 0; /* force lookup of first TB */
             for(;;) {
                 interrupt_request = env->interrupt_request;
diff --git a/cpus.c b/cpus.c
index 2d920907e0..0f339459a5 100644
--- a/cpus.c
+++ b/cpus.c
@@ -34,9 +34,6 @@
 
 #include "cpus.h"
 #include "compatfd.h"
-#ifdef CONFIG_LINUX
-#include <sys/prctl.h>
-#endif
 
 #ifdef SIGRTMIN
 #define SIG_IPI (SIGRTMIN+4)
@@ -44,10 +41,24 @@
 #define SIG_IPI SIGUSR1
 #endif
 
+#ifdef CONFIG_LINUX
+
+#include <sys/prctl.h>
+
 #ifndef PR_MCE_KILL
 #define PR_MCE_KILL 33
 #endif
 
+#ifndef PR_MCE_KILL_SET
+#define PR_MCE_KILL_SET 1
+#endif
+
+#ifndef PR_MCE_KILL_EARLY
+#define PR_MCE_KILL_EARLY 1
+#endif
+
+#endif /* CONFIG_LINUX */
+
 static CPUState *next_cpu;
 
 /***********************************************************/
@@ -119,45 +130,117 @@ static void do_vm_stop(int reason)
 
 static int cpu_can_run(CPUState *env)
 {
-    if (env->stop)
+    if (env->stop) {
         return 0;
-    if (env->stopped || !vm_running)
+    }
+    if (env->stopped || !vm_running) {
         return 0;
+    }
     return 1;
 }
 
-static int cpu_has_work(CPUState *env)
+static bool cpu_thread_is_idle(CPUState *env)
 {
-    if (env->stop)
-        return 1;
-    if (env->queued_work_first)
-        return 1;
-    if (env->stopped || !vm_running)
-        return 0;
-    if (!env->halted)
-        return 1;
-    if (qemu_cpu_has_work(env))
-        return 1;
-    return 0;
+    if (env->stop || env->queued_work_first) {
+        return false;
+    }
+    if (env->stopped || !vm_running) {
+        return true;
+    }
+    if (!env->halted || qemu_cpu_has_work(env)) {
+        return false;
+    }
+    return true;
 }
 
-static int any_cpu_has_work(void)
+static bool all_cpu_threads_idle(void)
 {
     CPUState *env;
 
-    for (env = first_cpu; env != NULL; env = env->next_cpu)
-        if (cpu_has_work(env))
-            return 1;
-    return 0;
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
+        if (!cpu_thread_is_idle(env)) {
+            return false;
+        }
+    }
+    return true;
 }
 
-static void cpu_debug_handler(CPUState *env)
+static CPUDebugExcpHandler *debug_excp_handler;
+
+CPUDebugExcpHandler *cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler)
 {
-    gdb_set_stop_cpu(env);
-    debug_requested = EXCP_DEBUG;
-    vm_stop(EXCP_DEBUG);
+    CPUDebugExcpHandler *old_handler = debug_excp_handler;
+
+    debug_excp_handler = handler;
+    return old_handler;
 }
 
+static void cpu_handle_debug_exception(CPUState *env)
+{
+    CPUWatchpoint *wp;
+
+    if (!env->watchpoint_hit) {
+        QTAILQ_FOREACH(wp, &env->watchpoints, entry) {
+            wp->flags &= ~BP_WATCHPOINT_HIT;
+        }
+    }
+    if (debug_excp_handler) {
+        debug_excp_handler(env);
+    }
+
+    gdb_set_stop_cpu(env);
+    qemu_system_debug_request();
+#ifdef CONFIG_IOTHREAD
+    env->stopped = 1;
+#endif
+}
+
+#ifdef CONFIG_LINUX
+static void sigbus_reraise(void)
+{
+    sigset_t set;
+    struct sigaction action;
+
+    memset(&action, 0, sizeof(action));
+    action.sa_handler = SIG_DFL;
+    if (!sigaction(SIGBUS, &action, NULL)) {
+        raise(SIGBUS);
+        sigemptyset(&set);
+        sigaddset(&set, SIGBUS);
+        sigprocmask(SIG_UNBLOCK, &set, NULL);
+    }
+    perror("Failed to re-raise SIGBUS!\n");
+    abort();
+}
+
+static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
+                           void *ctx)
+{
+    if (kvm_on_sigbus(siginfo->ssi_code,
+                      (void *)(intptr_t)siginfo->ssi_addr)) {
+        sigbus_reraise();
+    }
+}
+
+static void qemu_init_sigbus(void)
+{
+    struct sigaction action;
+
+    memset(&action, 0, sizeof(action));
+    action.sa_flags = SA_SIGINFO;
+    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
+    sigaction(SIGBUS, &action, NULL);
+
+    prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0);
+}
+
+#else /* !CONFIG_LINUX */
+
+static void qemu_init_sigbus(void)
+{
+}
+#endif /* !CONFIG_LINUX */
+
 #ifndef _WIN32
 static int io_thread_fd = -1;
 
@@ -167,9 +250,9 @@ static void qemu_event_increment(void)
     static const uint64_t val = 1;
     ssize_t ret;
 
-    if (io_thread_fd == -1)
+    if (io_thread_fd == -1) {
         return;
-
+    }
     do {
         ret = write(io_thread_fd, &val, sizeof(val));
     } while (ret < 0 && errno == EINTR);
@@ -200,17 +283,17 @@ static int qemu_event_init(void)
     int fds[2];
 
     err = qemu_eventfd(fds);
-    if (err == -1)
+    if (err == -1) {
         return -errno;
-
+    }
     err = fcntl_setfl(fds[0], O_NONBLOCK);
-    if (err < 0)
+    if (err < 0) {
         goto fail;
-
+    }
     err = fcntl_setfl(fds[1], O_NONBLOCK);
-    if (err < 0)
+    if (err < 0) {
         goto fail;
-
+    }
     qemu_set_fd_handler2(fds[0], NULL, qemu_event_read, NULL,
                          (void *)(unsigned long)fds[0]);
 
@@ -222,125 +305,11 @@ fail:
     close(fds[1]);
     return err;
 }
-#else
-HANDLE qemu_event_handle;
 
-static void dummy_event_handler(void *opaque)
+static void dummy_signal(int sig)
 {
 }
 
-static int qemu_event_init(void)
-{
-    qemu_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL);
-    if (!qemu_event_handle) {
-        fprintf(stderr, "Failed CreateEvent: %ld\n", GetLastError());
-        return -1;
-    }
-    qemu_add_wait_object(qemu_event_handle, dummy_event_handler, NULL);
-    return 0;
-}
-
-static void qemu_event_increment(void)
-{
-    if (!SetEvent(qemu_event_handle)) {
-        fprintf(stderr, "qemu_event_increment: SetEvent failed: %ld\n",
-                GetLastError());
-        exit (1);
-    }
-}
-#endif
-
-#ifndef CONFIG_IOTHREAD
-int qemu_init_main_loop(void)
-{
-    cpu_set_debug_excp_handler(cpu_debug_handler);
-
-    return qemu_event_init();
-}
-
-void qemu_main_loop_start(void)
-{
-}
-
-void qemu_init_vcpu(void *_env)
-{
-    CPUState *env = _env;
-
-    env->nr_cores = smp_cores;
-    env->nr_threads = smp_threads;
-    if (kvm_enabled())
-        kvm_init_vcpu(env);
-    return;
-}
-
-int qemu_cpu_self(void *env)
-{
-    return 1;
-}
-
-void run_on_cpu(CPUState *env, void (*func)(void *data), void *data)
-{
-    func(data);
-}
-
-void resume_all_vcpus(void)
-{
-}
-
-void pause_all_vcpus(void)
-{
-}
-
-void qemu_cpu_kick(void *env)
-{
-    return;
-}
-
-void qemu_notify_event(void)
-{
-    CPUState *env = cpu_single_env;
-
-    qemu_event_increment ();
-    if (env) {
-        cpu_exit(env);
-    }
-    if (next_cpu && env != next_cpu) {
-        cpu_exit(next_cpu);
-    }
-}
-
-void qemu_mutex_lock_iothread(void) {}
-void qemu_mutex_unlock_iothread(void) {}
-
-void vm_stop(int reason)
-{
-    do_vm_stop(reason);
-}
-
-#else /* CONFIG_IOTHREAD */
-
-#include "qemu-thread.h"
-
-QemuMutex qemu_global_mutex;
-static QemuMutex qemu_fair_mutex;
-
-static QemuThread io_thread;
-
-static QemuThread *tcg_cpu_thread;
-static QemuCond *tcg_halt_cond;
-
-static int qemu_system_ready;
-/* cpu creation */
-static QemuCond qemu_cpu_cond;
-/* system init */
-static QemuCond qemu_system_cond;
-static QemuCond qemu_pause_cond;
-static QemuCond qemu_work_cond;
-
-static void tcg_init_ipi(void);
-static void kvm_init_ipi(CPUState *env);
-static sigset_t block_io_signals(void);
-
 /* If we have signalfd, we mask out the signals we want to handle and then
  * use signalfd to listen for them.  We rely on whatever the current signal
  * handler is to dispatch the signals when we receive them.
@@ -394,23 +363,333 @@ static int qemu_signalfd_init(sigset_t mask)
     return 0;
 }
 
+static void qemu_kvm_eat_signals(CPUState *env)
+{
+    struct timespec ts = { 0, 0 };
+    siginfo_t siginfo;
+    sigset_t waitset;
+    sigset_t chkset;
+    int r;
+
+    sigemptyset(&waitset);
+    sigaddset(&waitset, SIG_IPI);
+    sigaddset(&waitset, SIGBUS);
+
+    do {
+        r = sigtimedwait(&waitset, &siginfo, &ts);
+        if (r == -1 && !(errno == EAGAIN || errno == EINTR)) {
+            perror("sigtimedwait");
+            exit(1);
+        }
+
+        switch (r) {
+        case SIGBUS:
+            if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) {
+                sigbus_reraise();
+            }
+            break;
+        default:
+            break;
+        }
+
+        r = sigpending(&chkset);
+        if (r == -1) {
+            perror("sigpending");
+            exit(1);
+        }
+    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
+
+#ifndef CONFIG_IOTHREAD
+    if (sigismember(&chkset, SIGIO) || sigismember(&chkset, SIGALRM)) {
+        qemu_notify_event();
+    }
+#endif
+}
+
+#else /* _WIN32 */
+
+HANDLE qemu_event_handle;
+
+static void dummy_event_handler(void *opaque)
+{
+}
+
+static int qemu_event_init(void)
+{
+    qemu_event_handle = CreateEvent(NULL, FALSE, FALSE, NULL);
+    if (!qemu_event_handle) {
+        fprintf(stderr, "Failed CreateEvent: %ld\n", GetLastError());
+        return -1;
+    }
+    qemu_add_wait_object(qemu_event_handle, dummy_event_handler, NULL);
+    return 0;
+}
+
+static void qemu_event_increment(void)
+{
+    if (!SetEvent(qemu_event_handle)) {
+        fprintf(stderr, "qemu_event_increment: SetEvent failed: %ld\n",
+                GetLastError());
+        exit (1);
+    }
+}
+
+static void qemu_kvm_eat_signals(CPUState *env)
+{
+}
+#endif /* _WIN32 */
+
+#ifndef CONFIG_IOTHREAD
+static void qemu_kvm_init_cpu_signals(CPUState *env)
+{
+#ifndef _WIN32
+    int r;
+    sigset_t set;
+    struct sigaction sigact;
+
+    memset(&sigact, 0, sizeof(sigact));
+    sigact.sa_handler = dummy_signal;
+    sigaction(SIG_IPI, &sigact, NULL);
+
+    sigemptyset(&set);
+    sigaddset(&set, SIG_IPI);
+    sigaddset(&set, SIGIO);
+    sigaddset(&set, SIGALRM);
+    pthread_sigmask(SIG_BLOCK, &set, NULL);
+
+    pthread_sigmask(SIG_BLOCK, NULL, &set);
+    sigdelset(&set, SIG_IPI);
+    sigdelset(&set, SIGBUS);
+    sigdelset(&set, SIGIO);
+    sigdelset(&set, SIGALRM);
+    r = kvm_set_signal_mask(env, &set);
+    if (r) {
+        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
+        exit(1);
+    }
+#endif
+}
+
+#ifndef _WIN32
+static sigset_t block_synchronous_signals(void)
+{
+    sigset_t set;
+
+    sigemptyset(&set);
+    sigaddset(&set, SIGBUS);
+    if (kvm_enabled()) {
+        /*
+         * We need to process timer signals synchronously to avoid a race
+         * between exit_request check and KVM vcpu entry.
+         */
+        sigaddset(&set, SIGIO);
+        sigaddset(&set, SIGALRM);
+    }
+
+    return set;
+}
+#endif
+
+int qemu_init_main_loop(void)
+{
+#ifndef _WIN32
+    sigset_t blocked_signals;
+    int ret;
+
+    blocked_signals = block_synchronous_signals();
+
+    ret = qemu_signalfd_init(blocked_signals);
+    if (ret) {
+        return ret;
+    }
+#endif
+
+    qemu_init_sigbus();
+
+    return qemu_event_init();
+}
+
+void qemu_main_loop_start(void)
+{
+}
+
+void qemu_init_vcpu(void *_env)
+{
+    CPUState *env = _env;
+    int r;
+
+    env->nr_cores = smp_cores;
+    env->nr_threads = smp_threads;
+
+    if (kvm_enabled()) {
+        r = kvm_init_vcpu(env);
+        if (r < 0) {
+            fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
+            exit(1);
+        }
+        qemu_kvm_init_cpu_signals(env);
+    }
+}
+
+int qemu_cpu_self(void *env)
+{
+    return 1;
+}
+
+void run_on_cpu(CPUState *env, void (*func)(void *data), void *data)
+{
+    func(data);
+}
+
+void resume_all_vcpus(void)
+{
+}
+
+void pause_all_vcpus(void)
+{
+}
+
+void qemu_cpu_kick(void *env)
+{
+}
+
+void qemu_cpu_kick_self(void)
+{
+#ifndef _WIN32
+    assert(cpu_single_env);
+
+    raise(SIG_IPI);
+#else
+    abort();
+#endif
+}
+
+void qemu_notify_event(void)
+{
+    CPUState *env = cpu_single_env;
+
+    qemu_event_increment ();
+    if (env) {
+        cpu_exit(env);
+    }
+    if (next_cpu && env != next_cpu) {
+        cpu_exit(next_cpu);
+    }
+    exit_request = 1;
+}
+
+void qemu_mutex_lock_iothread(void) {}
+void qemu_mutex_unlock_iothread(void) {}
+
+void cpu_stop_current(void)
+{
+}
+
+void vm_stop(int reason)
+{
+    do_vm_stop(reason);
+}
+
+#else /* CONFIG_IOTHREAD */
+
+#include "qemu-thread.h"
+
+QemuMutex qemu_global_mutex;
+static QemuMutex qemu_fair_mutex;
+
+static QemuThread io_thread;
+
+static QemuThread *tcg_cpu_thread;
+static QemuCond *tcg_halt_cond;
+
+static int qemu_system_ready;
+/* cpu creation */
+static QemuCond qemu_cpu_cond;
+/* system init */
+static QemuCond qemu_system_cond;
+static QemuCond qemu_pause_cond;
+static QemuCond qemu_work_cond;
+
+static void cpu_signal(int sig)
+{
+    if (cpu_single_env) {
+        cpu_exit(cpu_single_env);
+    }
+    exit_request = 1;
+}
+
+static void qemu_kvm_init_cpu_signals(CPUState *env)
+{
+    int r;
+    sigset_t set;
+    struct sigaction sigact;
+
+    memset(&sigact, 0, sizeof(sigact));
+    sigact.sa_handler = dummy_signal;
+    sigaction(SIG_IPI, &sigact, NULL);
+
+    pthread_sigmask(SIG_BLOCK, NULL, &set);
+    sigdelset(&set, SIG_IPI);
+    sigdelset(&set, SIGBUS);
+    r = kvm_set_signal_mask(env, &set);
+    if (r) {
+        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r));
+        exit(1);
+    }
+}
+
+static void qemu_tcg_init_cpu_signals(void)
+{
+    sigset_t set;
+    struct sigaction sigact;
+
+    memset(&sigact, 0, sizeof(sigact));
+    sigact.sa_handler = cpu_signal;
+    sigaction(SIG_IPI, &sigact, NULL);
+
+    sigemptyset(&set);
+    sigaddset(&set, SIG_IPI);
+    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
+}
+
+static sigset_t block_io_signals(void)
+{
+    sigset_t set;
+
+    /* SIGUSR2 used by posix-aio-compat.c */
+    sigemptyset(&set);
+    sigaddset(&set, SIGUSR2);
+    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
+
+    sigemptyset(&set);
+    sigaddset(&set, SIGIO);
+    sigaddset(&set, SIGALRM);
+    sigaddset(&set, SIG_IPI);
+    sigaddset(&set, SIGBUS);
+    pthread_sigmask(SIG_BLOCK, &set, NULL);
+
+    return set;
+}
+
 int qemu_init_main_loop(void)
 {
     int ret;
     sigset_t blocked_signals;
 
-    cpu_set_debug_excp_handler(cpu_debug_handler);
+    qemu_init_sigbus();
 
     blocked_signals = block_io_signals();
 
     ret = qemu_signalfd_init(blocked_signals);
-    if (ret)
+    if (ret) {
         return ret;
+    }
 
     /* Note eventfd must be drained before signalfd handlers run */
     ret = qemu_event_init();
-    if (ret)
+    if (ret) {
         return ret;
+    }
 
     qemu_cond_init(&qemu_cpu_cond);
     qemu_cond_init(&qemu_system_cond);
@@ -442,10 +721,11 @@ void run_on_cpu(CPUState *env, void (*func)(void *data), void *data)
 
     wi.func = func;
     wi.data = data;
-    if (!env->queued_work_first)
+    if (!env->queued_work_first) {
         env->queued_work_first = &wi;
-    else
+    } else {
         env->queued_work_last->next = &wi;
+    }
     env->queued_work_last = &wi;
     wi.next = NULL;
     wi.done = false;
@@ -463,8 +743,9 @@ static void flush_queued_work(CPUState *env)
 {
     struct qemu_work_item *wi;
 
-    if (!env->queued_work_first)
+    if (!env->queued_work_first) {
         return;
+    }
 
     while ((wi = env->queued_work_first)) {
         env->queued_work_first = wi->next;
@@ -483,14 +764,16 @@ static void qemu_wait_io_event_common(CPUState *env)
         qemu_cond_signal(&qemu_pause_cond);
     }
     flush_queued_work(env);
+    env->thread_kicked = false;
 }
 
 static void qemu_tcg_wait_io_event(void)
 {
     CPUState *env;
 
-    while (!any_cpu_has_work())
+    while (all_cpu_threads_idle()) {
         qemu_cond_timedwait(tcg_halt_cond, &qemu_global_mutex, 1000);
+    }
 
     qemu_mutex_unlock(&qemu_global_mutex);
 
@@ -509,134 +792,72 @@ static void qemu_tcg_wait_io_event(void)
     }
 }
 
-static void sigbus_reraise(void)
-{
-    sigset_t set;
-    struct sigaction action;
-
-    memset(&action, 0, sizeof(action));
-    action.sa_handler = SIG_DFL;
-    if (!sigaction(SIGBUS, &action, NULL)) {
-        raise(SIGBUS);
-        sigemptyset(&set);
-        sigaddset(&set, SIGBUS);
-        sigprocmask(SIG_UNBLOCK, &set, NULL);
-    }
-    perror("Failed to re-raise SIGBUS!\n");
-    abort();
-}
-
-static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo,
-                           void *ctx)
-{
-#if defined(TARGET_I386)
-    if (kvm_on_sigbus(siginfo->ssi_code, (void *)(intptr_t)siginfo->ssi_addr))
-#endif
-        sigbus_reraise();
-}
-
-static void qemu_kvm_eat_signal(CPUState *env, int timeout)
-{
-    struct timespec ts;
-    int r, e;
-    siginfo_t siginfo;
-    sigset_t waitset;
-    sigset_t chkset;
-
-    ts.tv_sec = timeout / 1000;
-    ts.tv_nsec = (timeout % 1000) * 1000000;
-
-    sigemptyset(&waitset);
-    sigaddset(&waitset, SIG_IPI);
-    sigaddset(&waitset, SIGBUS);
-
-    do {
-        qemu_mutex_unlock(&qemu_global_mutex);
-
-        r = sigtimedwait(&waitset, &siginfo, &ts);
-        e = errno;
-
-        qemu_mutex_lock(&qemu_global_mutex);
-
-        if (r == -1 && !(e == EAGAIN || e == EINTR)) {
-            fprintf(stderr, "sigtimedwait: %s\n", strerror(e));
-            exit(1);
-        }
-
-        switch (r) {
-        case SIGBUS:
-#ifdef TARGET_I386
-            if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr))
-#endif
-                sigbus_reraise();
-            break;
-        default:
-            break;
-        }
-
-        r = sigpending(&chkset);
-        if (r == -1) {
-            fprintf(stderr, "sigpending: %s\n", strerror(e));
-            exit(1);
-        }
-    } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS));
-}
-
 static void qemu_kvm_wait_io_event(CPUState *env)
 {
-    while (!cpu_has_work(env))
+    while (cpu_thread_is_idle(env)) {
         qemu_cond_timedwait(env->halt_cond, &qemu_global_mutex, 1000);
+    }
 
-    qemu_kvm_eat_signal(env, 0);
+    qemu_kvm_eat_signals(env);
     qemu_wait_io_event_common(env);
 }
 
-static int qemu_cpu_exec(CPUState *env);
-
-static void *kvm_cpu_thread_fn(void *arg)
+static void *qemu_kvm_cpu_thread_fn(void *arg)
 {
     CPUState *env = arg;
+    int r;
 
     qemu_mutex_lock(&qemu_global_mutex);
     qemu_thread_self(env->thread);
-    if (kvm_enabled())
-        kvm_init_vcpu(env);
 
-    kvm_init_ipi(env);
+    r = kvm_init_vcpu(env);
+    if (r < 0) {
+        fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r));
+        exit(1);
+    }
+
+    qemu_kvm_init_cpu_signals(env);
 
     /* signal CPU creation */
     env->created = 1;
     qemu_cond_signal(&qemu_cpu_cond);
 
     /* and wait for machine initialization */
-    while (!qemu_system_ready)
+    while (!qemu_system_ready) {
         qemu_cond_timedwait(&qemu_system_cond, &qemu_global_mutex, 100);
+    }
 
     while (1) {
-        if (cpu_can_run(env))
-            qemu_cpu_exec(env);
+        if (cpu_can_run(env)) {
+            r = kvm_cpu_exec(env);
+            if (r == EXCP_DEBUG) {
+                cpu_handle_debug_exception(env);
+            }
+        }
         qemu_kvm_wait_io_event(env);
     }
 
     return NULL;
 }
 
-static void *tcg_cpu_thread_fn(void *arg)
+static void *qemu_tcg_cpu_thread_fn(void *arg)
 {
     CPUState *env = arg;
 
-    tcg_init_ipi();
+    qemu_tcg_init_cpu_signals();
     qemu_thread_self(env->thread);
 
     /* signal CPU creation */
     qemu_mutex_lock(&qemu_global_mutex);
-    for (env = first_cpu; env != NULL; env = env->next_cpu)
+    for (env = first_cpu; env != NULL; env = env->next_cpu) {
         env->created = 1;
+    }
     qemu_cond_signal(&qemu_cpu_cond);
 
     /* and wait for machine initialization */
-    while (!qemu_system_ready)
+    while (!qemu_system_ready) {
         qemu_cond_timedwait(&qemu_system_cond, &qemu_global_mutex, 100);
+    }
 
     while (1) {
         cpu_exec_all();
@@ -649,8 +870,22 @@ static void *tcg_cpu_thread_fn(void *arg)
 void qemu_cpu_kick(void *_env)
 {
     CPUState *env = _env;
+
     qemu_cond_broadcast(env->halt_cond);
-    qemu_thread_signal(env->thread, SIG_IPI);
+    if (!env->thread_kicked) {
+        qemu_thread_signal(env->thread, SIG_IPI);
+        env->thread_kicked = true;
+    }
+}
+
+void qemu_cpu_kick_self(void)
+{
+    assert(cpu_single_env);
+
+    if (!cpu_single_env->thread_kicked) {
+        qemu_thread_signal(cpu_single_env->thread, SIG_IPI);
+        cpu_single_env->thread_kicked = true;
+    }
 }
 
 int qemu_cpu_self(void *_env)
@@ -663,77 +898,6 @@ int qemu_cpu_self(void *_env)
     return qemu_thread_equal(&this, env->thread);
 }
 
-static void cpu_signal(int sig)
-{
-    if (cpu_single_env)
-        cpu_exit(cpu_single_env);
-    exit_request = 1;
-}
-
-static void tcg_init_ipi(void)
-{
-    sigset_t set;
-    struct sigaction sigact;
-
-    memset(&sigact, 0, sizeof(sigact));
-    sigact.sa_handler = cpu_signal;
-    sigaction(SIG_IPI, &sigact, NULL);
-
-    sigemptyset(&set);
-    sigaddset(&set, SIG_IPI);
-    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
-}
-
-static void dummy_signal(int sig)
-{
-}
-
-static void kvm_init_ipi(CPUState *env)
-{
-    int r;
-    sigset_t set;
-    struct sigaction sigact;
-
-    memset(&sigact, 0, sizeof(sigact));
-    sigact.sa_handler = dummy_signal;
-    sigaction(SIG_IPI, &sigact, NULL);
-
-    pthread_sigmask(SIG_BLOCK, NULL, &set);
-    sigdelset(&set, SIG_IPI);
-    sigdelset(&set, SIGBUS);
-    r = kvm_set_signal_mask(env, &set);
-    if (r) {
-        fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(r));
-        exit(1);
-    }
-}
-
-static sigset_t block_io_signals(void)
-{
-    sigset_t set;
-    struct sigaction action;
-
-    /* SIGUSR2 used by posix-aio-compat.c */
-    sigemptyset(&set);
-    sigaddset(&set, SIGUSR2);
-    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
-
-    sigemptyset(&set);
-    sigaddset(&set, SIGIO);
-    sigaddset(&set, SIGALRM);
-    sigaddset(&set, SIG_IPI);
-    sigaddset(&set, SIGBUS);
-    pthread_sigmask(SIG_BLOCK, &set, NULL);
-
-    memset(&action, 0, sizeof(action));
-    action.sa_flags = SA_SIGINFO;
-    action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler;
-    sigaction(SIGBUS, &action, NULL);
-    prctl(PR_MCE_KILL, 1, 1, 0, 0);
-
-    return set;
-}
-
 void qemu_mutex_lock_iothread(void)
 {
     if (kvm_enabled()) {
@@ -758,8 +922,9 @@ static int all_vcpus_paused(void)
     CPUState *penv = first_cpu;
 
     while (penv) {
-        if (!penv->stopped)
+        if (!penv->stopped) {
             return 0;
+        }
         penv = (CPUState *)penv->next_cpu;
     }
 
@@ -798,17 +963,19 @@ void resume_all_vcpus(void)
     }
 }
 
-static void tcg_init_vcpu(void *_env)
+static void qemu_tcg_init_vcpu(void *_env)
 {
     CPUState *env = _env;
+
     /* share a single thread for all cpus with TCG */
     if (!tcg_cpu_thread) {
         env->thread = qemu_mallocz(sizeof(QemuThread));
         env->halt_cond = qemu_mallocz(sizeof(QemuCond));
         qemu_cond_init(env->halt_cond);
-        qemu_thread_create(env->thread, tcg_cpu_thread_fn, env);
-        while (env->created == 0)
+        qemu_thread_create(env->thread, qemu_tcg_cpu_thread_fn, env);
+        while (env->created == 0) {
             qemu_cond_timedwait(&qemu_cpu_cond, &qemu_global_mutex, 100);
+        }
         tcg_cpu_thread = env->thread;
         tcg_halt_cond = env->halt_cond;
     } else {
@@ -817,14 +984,15 @@ static void tcg_init_vcpu(void *_env)
     }
 }
 
-static void kvm_start_vcpu(CPUState *env)
+static void qemu_kvm_start_vcpu(CPUState *env)
 {
     env->thread = qemu_mallocz(sizeof(QemuThread));
     env->halt_cond = qemu_mallocz(sizeof(QemuCond));
     qemu_cond_init(env->halt_cond);
-    qemu_thread_create(env->thread, kvm_cpu_thread_fn, env);
-    while (env->created == 0)
+    qemu_thread_create(env->thread, qemu_kvm_cpu_thread_fn, env);
+    while (env->created == 0) {
         qemu_cond_timedwait(&qemu_cpu_cond, &qemu_global_mutex, 100);
+    }
 }
 
 void qemu_init_vcpu(void *_env)
@@ -833,10 +1001,11 @@ void qemu_init_vcpu(void *_env)
 
     env->nr_cores = smp_cores;
     env->nr_threads = smp_threads;
-    if (kvm_enabled())
-        kvm_start_vcpu(env);
-    else
-        tcg_init_vcpu(env);
+    if (kvm_enabled()) {
+        qemu_kvm_start_vcpu(env);
+    } else {
+        qemu_tcg_init_vcpu(env);
+    }
 }
 
 void qemu_notify_event(void)
@@ -844,10 +1013,12 @@ void qemu_notify_event(void)
     qemu_event_increment();
 }
 
-static void qemu_system_vmstop_request(int reason)
+void cpu_stop_current(void)
 {
-    vmstop_requested = reason;
-    qemu_notify_event();
+    if (cpu_single_env) {
+        cpu_single_env->stopped = 1;
+        cpu_exit(cpu_single_env);
+    }
 }
 
 void vm_stop(int reason)
@@ -861,10 +1032,7 @@ void vm_stop(int reason)
          * FIXME: should not return to device code in case
          * vm_stop() has been requested.
          */
-        if (cpu_single_env) {
-            cpu_exit(cpu_single_env);
-            cpu_single_env->stop = 1;
-        }
+        cpu_stop_current();
         return;
     }
     do_vm_stop(reason);
@@ -872,7 +1040,7 @@ void vm_stop(int reason)
 
 #endif
 
-static int qemu_cpu_exec(CPUState *env)
+static int tcg_cpu_exec(CPUState *env)
 {
     int ret;
 #ifdef CONFIG_PROFILER
@@ -912,18 +1080,29 @@ static int qemu_cpu_exec(CPUState *env)
 
 bool cpu_exec_all(void)
 {
-    if (next_cpu == NULL)
+    int r;
+
+    if (next_cpu == NULL) {
         next_cpu = first_cpu;
+    }
     for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) {
         CPUState *env = next_cpu;
 
         qemu_clock_enable(vm_clock,
                           (env->singlestep_enabled & SSTEP_NOTIMER) == 0);
 
-        if (qemu_alarm_pending())
+        if (qemu_alarm_pending()) {
             break;
+        }
         if (cpu_can_run(env)) {
-            if (qemu_cpu_exec(env) == EXCP_DEBUG) {
+            if (kvm_enabled()) {
+                r = kvm_cpu_exec(env);
+                qemu_kvm_eat_signals(env);
+            } else {
+                r = tcg_cpu_exec(env);
+            }
+            if (r == EXCP_DEBUG) {
+                cpu_handle_debug_exception(env);
                 break;
             }
         } else if (env->stop) {
@@ -931,7 +1110,7 @@ bool cpu_exec_all(void)
         }
     }
     exit_request = 0;
-    return any_cpu_has_work();
+    return !all_cpu_threads_idle();
 }
 
 void set_numa_modes(void)
diff --git a/cpus.h b/cpus.h
index bf4d9bb87a..e0211260c3 100644
--- a/cpus.h
+++ b/cpus.h
@@ -6,12 +6,11 @@ int qemu_init_main_loop(void);
 void qemu_main_loop_start(void);
 void resume_all_vcpus(void);
 void pause_all_vcpus(void);
+void cpu_stop_current(void);
 
 /* vl.c */
 extern int smp_cores;
 extern int smp_threads;
-extern int debug_requested;
-extern int vmstop_requested;
 void vm_state_notify(int running, int reason);
 bool cpu_exec_all(void);
 void set_numa_modes(void);
diff --git a/exec.c b/exec.c
index 1b70dc1b5e..d611100dc3 100644
--- a/exec.c
+++ b/exec.c
@@ -2078,6 +2078,36 @@ int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr,
     return ret;
 }
 
+int cpu_physical_log_start(target_phys_addr_t start_addr,
+                           ram_addr_t size)
+{
+    CPUPhysMemoryClient *client;
+    QLIST_FOREACH(client, &memory_client_list, list) {
+        if (client->log_start) {
+            int r = client->log_start(client, start_addr, size);
+            if (r < 0) {
+                return r;
+            }
+        }
+    }
+    return 0;
+}
+
+int cpu_physical_log_stop(target_phys_addr_t start_addr,
+                          ram_addr_t size)
+{
+    CPUPhysMemoryClient *client;
+    QLIST_FOREACH(client, &memory_client_list, list) {
+        if (client->log_stop) {
+            int r = client->log_stop(client, start_addr, size);
+            if (r < 0) {
+                return r;
+            }
+        }
+    }
+    return 0;
+}
+
 static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry)
 {
     ram_addr_t ram_addr;
diff --git a/gdbstub.c b/gdbstub.c
index d6556c9a2f..ed51a8a5bb 100644
--- a/gdbstub.c
+++ b/gdbstub.c
@@ -2194,14 +2194,14 @@ static void gdb_vm_state_change(void *opaque, int running, int reason)
     const char *type;
     int ret;
 
-    if (running || (reason != EXCP_DEBUG && reason != EXCP_INTERRUPT) ||
-        s->state == RS_INACTIVE || s->state == RS_SYSCALL)
+    if (running || (reason != VMSTOP_DEBUG && reason != VMSTOP_USER) ||
+        s->state == RS_INACTIVE || s->state == RS_SYSCALL) {
         return;
-
+    }
     /* disable single step if it was enable */
     cpu_single_step(env, 0);
 
-    if (reason == EXCP_DEBUG) {
+    if (reason == VMSTOP_DEBUG) {
         if (env->watchpoint_hit) {
             switch (env->watchpoint_hit->flags & BP_MEM_ACCESS) {
             case BP_MEM_READ:
@@ -2252,7 +2252,7 @@ void gdb_do_syscall(gdb_syscall_complete_cb cb, const char *fmt, ...)
     gdb_current_syscall_cb = cb;
     s->state = RS_SYSCALL;
 #ifndef CONFIG_USER_ONLY
-    vm_stop(EXCP_DEBUG);
+    vm_stop(VMSTOP_DEBUG);
 #endif
     s->state = RS_IDLE;
     va_start(va, fmt);
@@ -2326,7 +2326,7 @@ static void gdb_read_byte(GDBState *s, int ch)
     if (vm_running) {
         /* when the CPU is running, we cannot do anything except stop
            it when receiving a char */
-        vm_stop(EXCP_INTERRUPT);
+        vm_stop(VMSTOP_USER);
     } else
 #endif
     {
@@ -2588,7 +2588,7 @@ static void gdb_chr_event(void *opaque, int event)
 {
     switch (event) {
     case CHR_EVENT_OPENED:
-        vm_stop(EXCP_INTERRUPT);
+        vm_stop(VMSTOP_USER);
         gdb_has_xml = 0;
         break;
     default:
@@ -2628,8 +2628,9 @@ static int gdb_monitor_write(CharDriverState *chr, const uint8_t *buf, int len)
 #ifndef _WIN32
 static void gdb_sigterm_handler(int signal)
 {
-    if (vm_running)
-        vm_stop(EXCP_INTERRUPT);
+    if (vm_running) {
+        vm_stop(VMSTOP_USER);
+    }
 }
 #endif
 
diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c
index 5f45b5dee7..2724f7b480 100644
--- a/hw/cirrus_vga.c
+++ b/hw/cirrus_vga.c
@@ -31,7 +31,6 @@
 #include "pci.h"
 #include "console.h"
 #include "vga_int.h"
-#include "kvm.h"
 #include "loader.h"
 
 /*
diff --git a/hw/ide/core.c b/hw/ide/core.c
index dd63664c0d..9c91a49767 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -465,7 +465,7 @@ static int ide_handle_rw_error(IDEState *s, int error, int op)
         s->bus->dma->ops->set_unit(s->bus->dma, s->unit);
         s->bus->dma->ops->add_status(s->bus->dma, op);
         bdrv_mon_event(s->bs, BDRV_ACTION_STOP, is_read);
-        vm_stop(0);
+        vm_stop(VMSTOP_DISKFULL);
     } else {
         if (op & BM_STATUS_DMA_RETRY) {
             dma_buf_commit(s, 0);
diff --git a/hw/kvmclock.c b/hw/kvmclock.c
new file mode 100644
index 0000000000..b6ceddfba6
--- /dev/null
+++ b/hw/kvmclock.c
@@ -0,0 +1,125 @@
+/*
+ * QEMU KVM support, paravirtual clock device
+ *
+ * Copyright (C) 2011 Siemens AG
+ *
+ * Authors:
+ *  Jan Kiszka        <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL version 2.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu-common.h"
+#include "sysemu.h"
+#include "sysbus.h"
+#include "kvm.h"
+#include "kvmclock.h"
+
+#if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ADJUST_CLOCK)
+
+#include <linux/kvm.h>
+#include <linux/kvm_para.h>
+
+typedef struct KVMClockState {
+    SysBusDevice busdev;
+    uint64_t clock;
+    bool clock_valid;
+} KVMClockState;
+
+static void kvmclock_pre_save(void *opaque)
+{
+    KVMClockState *s = opaque;
+    struct kvm_clock_data data;
+    int ret;
+
+    if (s->clock_valid) {
+        return;
+    }
+    ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data);
+    if (ret < 0) {
+        fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret));
+        data.clock = 0;
+    }
+    s->clock = data.clock;
+    /*
+     * If the VM is stopped, declare the clock state valid to avoid re-reading
+     * it on next vmsave (which would return a different value). Will be reset
+     * when the VM is continued.
+     */
+    s->clock_valid = !vm_running;
+}
+
+static int kvmclock_post_load(void *opaque, int version_id)
+{
+    KVMClockState *s = opaque;
+    struct kvm_clock_data data;
+
+    data.clock = s->clock;
+    data.flags = 0;
+    return kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data);
+}
+
+static void kvmclock_vm_state_change(void *opaque, int running, int reason)
+{
+    KVMClockState *s = opaque;
+
+    if (running) {
+        s->clock_valid = false;
+    }
+}
+
+static int kvmclock_init(SysBusDevice *dev)
+{
+    KVMClockState *s = FROM_SYSBUS(KVMClockState, dev);
+
+    qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s);
+    return 0;
+}
+
+static const VMStateDescription kvmclock_vmsd = {
+    .name = "kvmclock",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .minimum_version_id_old = 1,
+    .pre_save = kvmclock_pre_save,
+    .post_load = kvmclock_post_load,
+    .fields = (VMStateField[]) {
+        VMSTATE_UINT64(clock, KVMClockState),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static SysBusDeviceInfo kvmclock_info = {
+    .qdev.name = "kvmclock",
+    .qdev.size = sizeof(KVMClockState),
+    .qdev.vmsd = &kvmclock_vmsd,
+    .qdev.no_user = 1,
+    .init = kvmclock_init,
+};
+
+/* Note: Must be called after VCPU initialization. */
+void kvmclock_create(void)
+{
+    if (kvm_enabled() &&
+        first_cpu->cpuid_kvm_features & (1ULL << KVM_FEATURE_CLOCKSOURCE)) {
+        sysbus_create_simple("kvmclock", -1, NULL);
+    }
+}
+
+static void kvmclock_register_device(void)
+{
+    if (kvm_enabled()) {
+        sysbus_register_withprop(&kvmclock_info);
+    }
+}
+
+device_init(kvmclock_register_device);
+
+#else /* !(CONFIG_KVM_PARA && KVM_CAP_ADJUST_CLOCK) */
+
+void kvmclock_create(void)
+{
+}
+#endif /* !(CONFIG_KVM_PARA && KVM_CAP_ADJUST_CLOCK) */
diff --git a/hw/kvmclock.h b/hw/kvmclock.h
new file mode 100644
index 0000000000..7a83cbe8f6
--- /dev/null
+++ b/hw/kvmclock.h
@@ -0,0 +1,14 @@
+/*
+ * QEMU KVM support, paravirtual clock device
+ *
+ * Copyright (C) 2011 Siemens AG
+ *
+ * Authors:
+ *  Jan Kiszka        <jan.kiszka@siemens.com>
+ *
+ * This work is licensed under the terms of the GNU GPL version 2.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+void kvmclock_create(void);
diff --git a/hw/pc_piix.c b/hw/pc_piix.c
index d0bd0cd1fa..291845478d 100644
--- a/hw/pc_piix.c
+++ b/hw/pc_piix.c
@@ -32,6 +32,7 @@
 #include "boards.h"
 #include "ide.h"
 #include "kvm.h"
+#include "kvmclock.h"
 #include "sysemu.h"
 #include "sysbus.h"
 #include "arch_init.h"
@@ -66,7 +67,8 @@ static void pc_init1(ram_addr_t ram_size,
                      const char *kernel_cmdline,
                      const char *initrd_filename,
                      const char *cpu_model,
-                     int pci_enabled)
+                     int pci_enabled,
+                     int kvmclock_enabled)
 {
     int i;
     ram_addr_t below_4g_mem_size, above_4g_mem_size;
@@ -86,6 +88,10 @@ static void pc_init1(ram_addr_t ram_size,
 
     pc_cpus_init(cpu_model);
 
+    if (kvmclock_enabled) {
+        kvmclock_create();
+    }
+
     /* allocate ram and load rom/bios */
     pc_memory_init(ram_size, kernel_filename, kernel_cmdline, initrd_filename,
                    &below_4g_mem_size, &above_4g_mem_size);
@@ -193,7 +199,19 @@ static void pc_init_pci(ram_addr_t ram_size,
 {
     pc_init1(ram_size, boot_device,
              kernel_filename, kernel_cmdline,
-             initrd_filename, cpu_model, 1);
+             initrd_filename, cpu_model, 1, 1);
+}
+
+static void pc_init_pci_no_kvmclock(ram_addr_t ram_size,
+                                    const char *boot_device,
+                                    const char *kernel_filename,
+                                    const char *kernel_cmdline,
+                                    const char *initrd_filename,
+                                    const char *cpu_model)
+{
+    pc_init1(ram_size, boot_device,
+             kernel_filename, kernel_cmdline,
+             initrd_filename, cpu_model, 1, 0);
 }
 
 static void pc_init_isa(ram_addr_t ram_size,
@@ -207,7 +225,7 @@ static void pc_init_isa(ram_addr_t ram_size,
         cpu_model = "486";
     pc_init1(ram_size, boot_device,
              kernel_filename, kernel_cmdline,
-             initrd_filename, cpu_model, 0);
+             initrd_filename, cpu_model, 0, 1);
 }
 
 static QEMUMachine pc_machine = {
@@ -222,7 +240,7 @@ static QEMUMachine pc_machine = {
 static QEMUMachine pc_machine_v0_13 = {
     .name = "pc-0.13",
     .desc = "Standard PC",
-    .init = pc_init_pci,
+    .init = pc_init_pci_no_kvmclock,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         {
@@ -249,7 +267,7 @@ static QEMUMachine pc_machine_v0_13 = {
 static QEMUMachine pc_machine_v0_12 = {
     .name = "pc-0.12",
     .desc = "Standard PC",
-    .init = pc_init_pci,
+    .init = pc_init_pci_no_kvmclock,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         {
@@ -280,7 +298,7 @@ static QEMUMachine pc_machine_v0_12 = {
 static QEMUMachine pc_machine_v0_11 = {
     .name = "pc-0.11",
     .desc = "Standard PC, qemu 0.11",
-    .init = pc_init_pci,
+    .init = pc_init_pci_no_kvmclock,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         {
@@ -319,7 +337,7 @@ static QEMUMachine pc_machine_v0_11 = {
 static QEMUMachine pc_machine_v0_10 = {
     .name = "pc-0.10",
     .desc = "Standard PC, qemu 0.10",
-    .init = pc_init_pci,
+    .init = pc_init_pci_no_kvmclock,
     .max_cpus = 255,
     .compat_props = (GlobalProperty[]) {
         {
diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c
index 488eedd2cd..b05e6547df 100644
--- a/hw/scsi-disk.c
+++ b/hw/scsi-disk.c
@@ -239,7 +239,7 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type)
         r->status |= SCSI_REQ_STATUS_RETRY | type;
 
         bdrv_mon_event(s->bs, BDRV_ACTION_STOP, is_read);
-        vm_stop(0);
+        vm_stop(VMSTOP_DISKFULL);
     } else {
         if (type == SCSI_REQ_STATUS_RETRY_READ) {
             r->req.bus->complete(r->req.bus, SCSI_REASON_DATA, r->req.tag, 0);
diff --git a/hw/vga.c b/hw/vga.c
index e2151a2458..c22b8af833 100644
--- a/hw/vga.c
+++ b/hw/vga.c
@@ -28,7 +28,6 @@
 #include "vga_int.h"
 #include "pixel_ops.h"
 #include "qemu-timer.h"
-#include "kvm.h"
 
 //#define DEBUG_VGA
 //#define DEBUG_VGA_MEM
@@ -1573,34 +1572,36 @@ static void vga_sync_dirty_bitmap(VGACommonState *s)
 
 void vga_dirty_log_start(VGACommonState *s)
 {
-    if (kvm_enabled() && s->map_addr)
-        kvm_log_start(s->map_addr, s->map_end - s->map_addr);
+    if (s->map_addr) {
+        cpu_physical_log_start(s->map_addr, s->map_end - s->map_addr);
+    }
 
-    if (kvm_enabled() && s->lfb_vram_mapped) {
-        kvm_log_start(isa_mem_base + 0xa0000, 0x8000);
-        kvm_log_start(isa_mem_base + 0xa8000, 0x8000);
+    if (s->lfb_vram_mapped) {
+        cpu_physical_log_start(isa_mem_base + 0xa0000, 0x8000);
+        cpu_physical_log_start(isa_mem_base + 0xa8000, 0x8000);
     }
 
 #ifdef CONFIG_BOCHS_VBE
-    if (kvm_enabled() && s->vbe_mapped) {
-        kvm_log_start(VBE_DISPI_LFB_PHYSICAL_ADDRESS, s->vram_size);
+    if (s->vbe_mapped) {
+        cpu_physical_log_start(VBE_DISPI_LFB_PHYSICAL_ADDRESS, s->vram_size);
     }
 #endif
 }
 
 void vga_dirty_log_stop(VGACommonState *s)
 {
-    if (kvm_enabled() && s->map_addr)
-	kvm_log_stop(s->map_addr, s->map_end - s->map_addr);
+    if (s->map_addr) {
+        cpu_physical_log_stop(s->map_addr, s->map_end - s->map_addr);
+    }
 
-    if (kvm_enabled() && s->lfb_vram_mapped) {
-	kvm_log_stop(isa_mem_base + 0xa0000, 0x8000);
-	kvm_log_stop(isa_mem_base + 0xa8000, 0x8000);
+    if (s->lfb_vram_mapped) {
+        cpu_physical_log_stop(isa_mem_base + 0xa0000, 0x8000);
+        cpu_physical_log_stop(isa_mem_base + 0xa8000, 0x8000);
     }
 
 #ifdef CONFIG_BOCHS_VBE
-    if (kvm_enabled() && s->vbe_mapped) {
-	kvm_log_stop(VBE_DISPI_LFB_PHYSICAL_ADDRESS, s->vram_size);
+    if (s->vbe_mapped) {
+        cpu_physical_log_stop(VBE_DISPI_LFB_PHYSICAL_ADDRESS, s->vram_size);
     }
 #endif
 }
diff --git a/hw/vhost.c b/hw/vhost.c
index 38cc3b365b..0ca3507f44 100644
--- a/hw/vhost.c
+++ b/hw/vhost.c
@@ -607,6 +607,8 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force)
     hdev->client.set_memory = vhost_client_set_memory;
     hdev->client.sync_dirty_bitmap = vhost_client_sync_dirty_bitmap;
     hdev->client.migration_log = vhost_client_migration_log;
+    hdev->client.log_start = NULL;
+    hdev->client.log_stop = NULL;
     hdev->mem = qemu_mallocz(offsetof(struct vhost_memory, regions));
     hdev->log = NULL;
     hdev->log_size = 0;
diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c
index ffac5a4d8f..b14fb995e8 100644
--- a/hw/virtio-blk.c
+++ b/hw/virtio-blk.c
@@ -78,7 +78,7 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error,
         req->next = s->rq;
         s->rq = req;
         bdrv_mon_event(s->bs, BDRV_ACTION_STOP, is_read);
-        vm_stop(0);
+        vm_stop(VMSTOP_DISKFULL);
     } else {
         virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
         bdrv_mon_event(s->bs, BDRV_ACTION_REPORT, is_read);
diff --git a/hw/watchdog.c b/hw/watchdog.c
index e9dd56e229..1c900a1189 100644
--- a/hw/watchdog.c
+++ b/hw/watchdog.c
@@ -132,7 +132,7 @@ void watchdog_perform_action(void)
 
     case WDT_PAUSE:             /* same as 'stop' command in monitor */
         watchdog_mon_event("pause");
-        vm_stop(0);
+        vm_stop(VMSTOP_WATCHDOG);
         break;
 
     case WDT_DEBUG:
diff --git a/kvm-all.c b/kvm-all.c
index 2ec9e0980d..e6a7de4722 100644
--- a/kvm-all.c
+++ b/kvm-all.c
@@ -78,7 +78,7 @@ struct KVMState
     int many_ioeventfds;
 };
 
-static KVMState *kvm_state;
+KVMState *kvm_state;
 
 static const KVMCapabilityInfo kvm_required_capabilites[] = {
     KVM_CAP_INFO(USER_MEMORY),
@@ -91,10 +91,6 @@ static KVMSlot *kvm_alloc_slot(KVMState *s)
     int i;
 
     for (i = 0; i < ARRAY_SIZE(s->slots); i++) {
-        /* KVM private memory slots */
-        if (i >= 8 && i < 12) {
-            continue;
-        }
         if (s->slots[i].memory_size == 0) {
             return &s->slots[i];
         }
@@ -199,7 +195,6 @@ int kvm_pit_in_kernel(void)
     return kvm_state->pit_in_kernel;
 }
 
-
 int kvm_init_vcpu(CPUState *env)
 {
     KVMState *s = kvm_state;
@@ -219,6 +214,7 @@ int kvm_init_vcpu(CPUState *env)
 
     mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
     if (mmap_size < 0) {
+        ret = mmap_size;
         DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
         goto err;
     }
@@ -278,13 +274,15 @@ static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr,
     return kvm_set_user_memory_region(s, mem);
 }
 
-int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
+static int kvm_log_start(CPUPhysMemoryClient *client,
+                         target_phys_addr_t phys_addr, ram_addr_t size)
 {
     return kvm_dirty_pages_log_change(phys_addr, size, KVM_MEM_LOG_DIRTY_PAGES,
                                       KVM_MEM_LOG_DIRTY_PAGES);
 }
 
-int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
+static int kvm_log_stop(CPUPhysMemoryClient *client,
+                        target_phys_addr_t phys_addr, ram_addr_t size)
 {
     return kvm_dirty_pages_log_change(phys_addr, size, 0,
                                       KVM_MEM_LOG_DIRTY_PAGES);
@@ -648,6 +646,8 @@ static CPUPhysMemoryClient kvm_cpu_phys_memory_client = {
     .set_memory = kvm_client_set_memory,
     .sync_dirty_bitmap = kvm_client_sync_dirty_bitmap,
     .migration_log = kvm_client_migration_log,
+    .log_start = kvm_log_start,
+    .log_stop = kvm_log_stop,
 };
 
 int kvm_init(void)
@@ -774,8 +774,8 @@ err:
     return ret;
 }
 
-static int kvm_handle_io(uint16_t port, void *data, int direction, int size,
-                         uint32_t count)
+static void kvm_handle_io(uint16_t port, void *data, int direction, int size,
+                          uint32_t count)
 {
     int i;
     uint8_t *ptr = data;
@@ -809,8 +809,6 @@ static int kvm_handle_io(uint16_t port, void *data, int direction, int size,
 
         ptr += size;
     }
-
-    return 1;
 }
 
 #ifdef KVM_CAP_INTERNAL_ERROR_DATA
@@ -895,29 +893,34 @@ int kvm_cpu_exec(CPUState *env)
 
     DPRINTF("kvm_cpu_exec()\n");
 
+    if (kvm_arch_process_irqchip_events(env)) {
+        env->exit_request = 0;
+        return EXCP_HLT;
+    }
+
+    cpu_single_env = env;
+
     do {
-#ifndef CONFIG_IOTHREAD
-        if (env->exit_request) {
-            DPRINTF("interrupt exit requested\n");
-            ret = 0;
-            break;
-        }
-#endif
-
-        if (kvm_arch_process_irqchip_events(env)) {
-            ret = 0;
-            break;
-        }
-
         if (env->kvm_vcpu_dirty) {
             kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE);
             env->kvm_vcpu_dirty = 0;
         }
 
         kvm_arch_pre_run(env, run);
+        if (env->exit_request) {
+            DPRINTF("interrupt exit requested\n");
+            /*
+             * KVM requires us to reenter the kernel after IO exits to complete
+             * instruction emulation. This self-signal will ensure that we
+             * leave ASAP again.
+             */
+            qemu_cpu_kick_self();
+        }
         cpu_single_env = NULL;
         qemu_mutex_unlock_iothread();
+
         ret = kvm_vcpu_ioctl(env, KVM_RUN, 0);
+
         qemu_mutex_lock_iothread();
         cpu_single_env = env;
         kvm_arch_post_run(env, run);
@@ -925,7 +928,6 @@ int kvm_cpu_exec(CPUState *env)
         kvm_flush_coalesced_mmio_buffer();
 
         if (ret == -EINTR || ret == -EAGAIN) {
-            cpu_exit(env);
             DPRINTF("io window exit\n");
             ret = 0;
             break;
@@ -940,11 +942,12 @@ int kvm_cpu_exec(CPUState *env)
         switch (run->exit_reason) {
         case KVM_EXIT_IO:
             DPRINTF("handle_io\n");
-            ret = kvm_handle_io(run->io.port,
-                                (uint8_t *)run + run->io.data_offset,
-                                run->io.direction,
-                                run->io.size,
-                                run->io.count);
+            kvm_handle_io(run->io.port,
+                          (uint8_t *)run + run->io.data_offset,
+                          run->io.direction,
+                          run->io.size,
+                          run->io.count);
+            ret = 1;
             break;
         case KVM_EXIT_MMIO:
             DPRINTF("handle_mmio\n");
@@ -960,7 +963,6 @@ int kvm_cpu_exec(CPUState *env)
         case KVM_EXIT_SHUTDOWN:
             DPRINTF("shutdown\n");
             qemu_system_reset_request();
-            ret = 1;
             break;
         case KVM_EXIT_UNKNOWN:
             fprintf(stderr, "KVM: unknown exit, hardware reason %" PRIx64 "\n",
@@ -976,8 +978,8 @@ int kvm_cpu_exec(CPUState *env)
             DPRINTF("kvm_exit_debug\n");
 #ifdef KVM_CAP_SET_GUEST_DEBUG
             if (kvm_arch_debug(&run->debug.arch)) {
-                env->exception_index = EXCP_DEBUG;
-                return 0;
+                ret = EXCP_DEBUG;
+                goto out;
             }
             /* re-enter, this exception was guest-internal */
             ret = 1;
@@ -992,14 +994,13 @@ int kvm_cpu_exec(CPUState *env)
 
     if (ret < 0) {
         cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE);
-        vm_stop(0);
-        env->exit_request = 1;
-    }
-    if (env->exit_request) {
-        env->exit_request = 0;
-        env->exception_index = EXCP_INTERRUPT;
+        vm_stop(VMSTOP_PANIC);
     }
+    ret = EXCP_INTERRUPT;
 
+out:
+    env->exit_request = 0;
+    cpu_single_env = NULL;
     return ret;
 }
 
@@ -1365,3 +1366,13 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign)
     return -ENOSYS;
 #endif
 }
+
+int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
+{
+    return kvm_arch_on_sigbus_vcpu(env, code, addr);
+}
+
+int kvm_on_sigbus(int code, void *addr)
+{
+    return kvm_arch_on_sigbus(code, addr);
+}
diff --git a/kvm-stub.c b/kvm-stub.c
index 88682f288b..30f6ec3956 100644
--- a/kvm-stub.c
+++ b/kvm-stub.c
@@ -33,16 +33,6 @@ int kvm_init_vcpu(CPUState *env)
     return -ENOSYS;
 }
 
-int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size)
-{
-    return -ENOSYS;
-}
-
-int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size)
-{
-    return -ENOSYS;
-}
-
 int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size)
 {
     return -ENOSYS;
@@ -147,6 +137,11 @@ int kvm_set_ioeventfd_mmio_long(int fd, uint32_t adr, uint32_t val, bool assign)
     return -ENOSYS;
 }
 
+int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
+{
+    return 1;
+}
+
 int kvm_on_sigbus(int code, void *addr)
 {
     return 1;
diff --git a/kvm.h b/kvm.h
index ca57517af2..59b2c29fd9 100644
--- a/kvm.h
+++ b/kvm.h
@@ -58,9 +58,6 @@ int kvm_init_vcpu(CPUState *env);
 int kvm_cpu_exec(CPUState *env);
 
 #if !defined(CONFIG_USER_ONLY)
-int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size);
-int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size);
-
 void kvm_setup_guest_memory(void *start, size_t size);
 
 int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size);
@@ -81,10 +78,14 @@ int kvm_set_signal_mask(CPUState *env, const sigset_t *sigset);
 int kvm_pit_in_kernel(void);
 int kvm_irqchip_in_kernel(void);
 
+int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr);
+int kvm_on_sigbus(int code, void *addr);
+
 /* internal API */
 
 struct KVMState;
 typedef struct KVMState KVMState;
+extern KVMState *kvm_state;
 
 int kvm_ioctl(KVMState *s, int type, ...);
 
@@ -96,12 +97,11 @@ int kvm_vcpu_ioctl(CPUState *env, int type, ...);
 
 extern const KVMCapabilityInfo kvm_arch_required_capabilities[];
 
-int kvm_arch_post_run(CPUState *env, struct kvm_run *run);
+void kvm_arch_pre_run(CPUState *env, struct kvm_run *run);
+void kvm_arch_post_run(CPUState *env, struct kvm_run *run);
 
 int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run);
 
-int kvm_arch_pre_run(CPUState *env, struct kvm_run *run);
-
 int kvm_arch_process_irqchip_events(CPUState *env);
 
 int kvm_arch_get_registers(CPUState *env);
@@ -121,8 +121,8 @@ int kvm_arch_init_vcpu(CPUState *env);
 
 void kvm_arch_reset_vcpu(CPUState *env);
 
-int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr);
-int kvm_on_sigbus(int code, void *addr);
+int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr);
+int kvm_arch_on_sigbus(int code, void *addr);
 
 struct kvm_guest_debug;
 struct kvm_debug_exit_arch;
diff --git a/migration.c b/migration.c
index 36125720a4..af3a1f2702 100644
--- a/migration.c
+++ b/migration.c
@@ -378,7 +378,7 @@ void migrate_fd_put_ready(void *opaque)
         int old_vm_running = vm_running;
 
         DPRINTF("done iterating\n");
-        vm_stop(0);
+        vm_stop(VMSTOP_MIGRATE);
 
         if ((qemu_savevm_state_complete(s->mon, s->file)) < 0) {
             if (old_vm_running) {
diff --git a/monitor.c b/monitor.c
index 7fc311d720..22ae3bbff0 100644
--- a/monitor.c
+++ b/monitor.c
@@ -1255,7 +1255,7 @@ static void do_singlestep(Monitor *mon, const QDict *qdict)
  */
 static int do_stop(Monitor *mon, const QDict *qdict, QObject **ret_data)
 {
-    vm_stop(EXCP_INTERRUPT);
+    vm_stop(VMSTOP_USER);
     return 0;
 }
 
@@ -2783,7 +2783,7 @@ static void do_loadvm(Monitor *mon, const QDict *qdict)
     int saved_vm_running  = vm_running;
     const char *name = qdict_get_str(qdict, "name");
 
-    vm_stop(0);
+    vm_stop(VMSTOP_LOADVM);
 
     if (load_vmstate(name) == 0 && saved_vm_running) {
         vm_start();
diff --git a/qemu-common.h b/qemu-common.h
index c7ff280b95..a4d9c21a39 100644
--- a/qemu-common.h
+++ b/qemu-common.h
@@ -288,6 +288,7 @@ void qemu_notify_event(void);
 
 /* Unblock cpu */
 void qemu_cpu_kick(void *env);
+void qemu_cpu_kick_self(void);
 int qemu_cpu_self(void *env);
 
 /* work queue */
diff --git a/savevm.c b/savevm.c
index 6d83b0f4dc..a50fd31154 100644
--- a/savevm.c
+++ b/savevm.c
@@ -1575,7 +1575,7 @@ static int qemu_savevm_state(Monitor *mon, QEMUFile *f)
     int ret;
 
     saved_vm_running = vm_running;
-    vm_stop(0);
+    vm_stop(VMSTOP_SAVEVM);
 
     if (qemu_savevm_state_blocked(mon)) {
         ret = -EINVAL;
@@ -1904,7 +1904,7 @@ void do_savevm(Monitor *mon, const QDict *qdict)
     }
 
     saved_vm_running = vm_running;
-    vm_stop(0);
+    vm_stop(VMSTOP_SAVEVM);
 
     memset(sn, 0, sizeof(*sn));
 
diff --git a/sysemu.h b/sysemu.h
index 23ae17e2e9..0a83ab9e56 100644
--- a/sysemu.h
+++ b/sysemu.h
@@ -37,6 +37,16 @@ VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
                                                      void *opaque);
 void qemu_del_vm_change_state_handler(VMChangeStateEntry *e);
 
+#define VMSTOP_USER      0
+#define VMSTOP_DEBUG     1
+#define VMSTOP_SHUTDOWN  2
+#define VMSTOP_DISKFULL  3
+#define VMSTOP_WATCHDOG  4
+#define VMSTOP_PANIC     5
+#define VMSTOP_SAVEVM    6
+#define VMSTOP_LOADVM    7
+#define VMSTOP_MIGRATE   8
+
 void vm_start(void);
 void vm_stop(int reason);
 
@@ -51,6 +61,8 @@ void cpu_disable_ticks(void);
 void qemu_system_reset_request(void);
 void qemu_system_shutdown_request(void);
 void qemu_system_powerdown_request(void);
+void qemu_system_debug_request(void);
+void qemu_system_vmstop_request(int reason);
 int qemu_shutdown_requested(void);
 int qemu_reset_requested(void);
 int qemu_powerdown_requested(void);
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index af701a4412..5f1df8b4d3 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -734,6 +734,7 @@ typedef struct CPUX86State {
     uint32_t sipi_vector;
     uint32_t cpuid_kvm_features;
     uint32_t cpuid_svm_features;
+    bool tsc_valid;
     
     /* in order to simplify APIC support, we leave this pointer to the
        user */
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index 05010bbc38..0aa0a410da 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -301,6 +301,15 @@ void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status,
 #endif
 }
 
+static void cpu_update_state(void *opaque, int running, int reason)
+{
+    CPUState *env = opaque;
+
+    if (running) {
+        env->tsc_valid = false;
+    }
+}
+
 int kvm_arch_init_vcpu(CPUState *env)
 {
     struct {
@@ -434,6 +443,8 @@ int kvm_arch_init_vcpu(CPUState *env)
     }
 #endif
 
+    qemu_add_vm_change_state_handler(cpu_update_state, env);
+
     return kvm_vcpu_ioctl(env, KVM_SET_CPUID2, &cpuid_data);
 }
 
@@ -1061,7 +1072,12 @@ static int kvm_get_msrs(CPUState *env)
     if (has_msr_hsave_pa) {
         msrs[n++].index = MSR_VM_HSAVE_PA;
     }
-    msrs[n++].index = MSR_IA32_TSC;
+
+    if (!env->tsc_valid) {
+        msrs[n++].index = MSR_IA32_TSC;
+        env->tsc_valid = !vm_running;
+    }
+
 #ifdef TARGET_X86_64
     if (lm_capable_kernel) {
         msrs[n++].index = MSR_CSTAR;
@@ -1424,49 +1440,65 @@ int kvm_arch_get_registers(CPUState *env)
     return 0;
 }
 
-int kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
+void kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
 {
+    int ret;
+
     /* Inject NMI */
     if (env->interrupt_request & CPU_INTERRUPT_NMI) {
         env->interrupt_request &= ~CPU_INTERRUPT_NMI;
         DPRINTF("injected NMI\n");
-        kvm_vcpu_ioctl(env, KVM_NMI);
-    }
-
-    /* Try to inject an interrupt if the guest can accept it */
-    if (run->ready_for_interrupt_injection &&
-        (env->interrupt_request & CPU_INTERRUPT_HARD) &&
-        (env->eflags & IF_MASK)) {
-        int irq;
-
-        env->interrupt_request &= ~CPU_INTERRUPT_HARD;
-        irq = cpu_get_pic_interrupt(env);
-        if (irq >= 0) {
-            struct kvm_interrupt intr;
-            intr.irq = irq;
-            /* FIXME: errors */
-            DPRINTF("injected interrupt %d\n", irq);
-            kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
+        ret = kvm_vcpu_ioctl(env, KVM_NMI);
+        if (ret < 0) {
+            fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n",
+                    strerror(-ret));
         }
     }
 
-    /* If we have an interrupt but the guest is not ready to receive an
-     * interrupt, request an interrupt window exit.  This will
-     * cause a return to userspace as soon as the guest is ready to
-     * receive interrupts. */
-    if ((env->interrupt_request & CPU_INTERRUPT_HARD)) {
-        run->request_interrupt_window = 1;
-    } else {
-        run->request_interrupt_window = 0;
+    if (!kvm_irqchip_in_kernel()) {
+        /* Force the VCPU out of its inner loop to process the INIT request */
+        if (env->interrupt_request & CPU_INTERRUPT_INIT) {
+            env->exit_request = 1;
+        }
+
+        /* Try to inject an interrupt if the guest can accept it */
+        if (run->ready_for_interrupt_injection &&
+            (env->interrupt_request & CPU_INTERRUPT_HARD) &&
+            (env->eflags & IF_MASK)) {
+            int irq;
+
+            env->interrupt_request &= ~CPU_INTERRUPT_HARD;
+            irq = cpu_get_pic_interrupt(env);
+            if (irq >= 0) {
+                struct kvm_interrupt intr;
+
+                intr.irq = irq;
+                DPRINTF("injected interrupt %d\n", irq);
+                ret = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr);
+                if (ret < 0) {
+                    fprintf(stderr,
+                            "KVM: injection failed, interrupt lost (%s)\n",
+                            strerror(-ret));
+                }
+            }
+        }
+
+        /* If we have an interrupt but the guest is not ready to receive an
+         * interrupt, request an interrupt window exit.  This will
+         * cause a return to userspace as soon as the guest is ready to
+         * receive interrupts. */
+        if ((env->interrupt_request & CPU_INTERRUPT_HARD)) {
+            run->request_interrupt_window = 1;
+        } else {
+            run->request_interrupt_window = 0;
+        }
+
+        DPRINTF("setting tpr\n");
+        run->cr8 = cpu_get_apic_tpr(env->apic_state);
     }
-
-    DPRINTF("setting tpr\n");
-    run->cr8 = cpu_get_apic_tpr(env->apic_state);
-
-    return 0;
 }
 
-int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
+void kvm_arch_post_run(CPUState *env, struct kvm_run *run)
 {
     if (run->if_flag) {
         env->eflags |= IF_MASK;
@@ -1475,18 +1507,21 @@ int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
     }
     cpu_set_apic_tpr(env->apic_state, run->cr8);
     cpu_set_apic_base(env->apic_state, run->apic_base);
-
-    return 0;
 }
 
 int kvm_arch_process_irqchip_events(CPUState *env)
 {
+    if (kvm_irqchip_in_kernel()) {
+        return 0;
+    }
+
+    if (env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI)) {
+        env->halted = 0;
+    }
     if (env->interrupt_request & CPU_INTERRUPT_INIT) {
         kvm_cpu_synchronize_state(env);
         do_cpu_init(env);
-        env->exception_index = EXCP_HALTED;
     }
-
     if (env->interrupt_request & CPU_INTERRUPT_SIPI) {
         kvm_cpu_synchronize_state(env);
         do_cpu_sipi(env);
@@ -1501,7 +1536,6 @@ static int kvm_handle_halt(CPUState *env)
           (env->eflags & IF_MASK)) &&
         !(env->interrupt_request & CPU_INTERRUPT_NMI)) {
         env->halted = 1;
-        env->exception_index = EXCP_HLT;
         return 0;
     }
 
@@ -1839,7 +1873,7 @@ static void kvm_mce_inj_srao_memscrub2(CPUState *env, target_phys_addr_t paddr)
 
 #endif
 
-int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
+int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr)
 {
 #if defined(KVM_CAP_MCE)
     void *vaddr;
@@ -1889,7 +1923,7 @@ int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
     return 0;
 }
 
-int kvm_on_sigbus(int code, void *addr)
+int kvm_arch_on_sigbus(int code, void *addr)
 {
 #if defined(KVM_CAP_MCE)
     if ((first_cpu->mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) {
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index 710eca1dca..bd4012a4ec 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -256,14 +256,12 @@ int kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
     return 0;
 }
 
-int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
+void kvm_arch_post_run(CPUState *env, struct kvm_run *run)
 {
-    return 0;
 }
 
-int kvm_arch_process_irqchip_events(CPUState *env)
+void kvm_arch_process_irqchip_events(CPUState *env)
 {
-    return 0;
 }
 
 static int kvmppc_handle_halt(CPUState *env)
@@ -404,3 +402,13 @@ bool kvm_arch_stop_on_emulation_error(CPUState *env)
 {
     return true;
 }
+
+int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr)
+{
+    return 1;
+}
+
+int kvm_arch_on_sigbus(int code, void *addr)
+{
+    return 1;
+}
diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c
index 38823f54f7..b349812dba 100644
--- a/target-s390x/kvm.c
+++ b/target-s390x/kvm.c
@@ -169,14 +169,12 @@ int kvm_arch_remove_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp)
     return 0;
 }
 
-int kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
+void kvm_arch_pre_run(CPUState *env, struct kvm_run *run)
 {
-    return 0;
 }
 
-int kvm_arch_post_run(CPUState *env, struct kvm_run *run)
+void kvm_arch_post_run(CPUState *env, struct kvm_run *run)
 {
-    return 0;
 }
 
 int kvm_arch_process_irqchip_events(CPUState *env)
@@ -505,3 +503,13 @@ bool kvm_arch_stop_on_emulation_error(CPUState *env)
 {
     return true;
 }
+
+int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr)
+{
+    return 1;
+}
+
+int kvm_arch_on_sigbus(int code, void *addr)
+{
+    return 1;
+}
diff --git a/vl.c b/vl.c
index ed2cdfae42..b436952d6c 100644
--- a/vl.c
+++ b/vl.c
@@ -1217,8 +1217,8 @@ static QTAILQ_HEAD(reset_handlers, QEMUResetEntry) reset_handlers =
 static int reset_requested;
 static int shutdown_requested;
 static int powerdown_requested;
-int debug_requested;
-int vmstop_requested;
+static int debug_requested;
+static int vmstop_requested;
 
 int qemu_shutdown_requested(void)
 {
@@ -1296,6 +1296,7 @@ void qemu_system_reset_request(void)
     } else {
         reset_requested = 1;
     }
+    cpu_stop_current();
     qemu_notify_event();
 }
 
@@ -1311,6 +1312,18 @@ void qemu_system_powerdown_request(void)
     qemu_notify_event();
 }
 
+void qemu_system_debug_request(void)
+{
+    debug_requested = 1;
+    qemu_notify_event();
+}
+
+void qemu_system_vmstop_request(int reason)
+{
+    vmstop_requested = reason;
+    qemu_notify_event();
+}
+
 void main_loop_wait(int nonblocking)
 {
     IOHandlerRecord *ioh;
@@ -1388,52 +1401,51 @@ void main_loop_wait(int nonblocking)
 
 }
 
-static int vm_can_run(void)
+#ifndef CONFIG_IOTHREAD
+static int vm_request_pending(void)
 {
-    if (powerdown_requested)
-        return 0;
-    if (reset_requested)
-        return 0;
-    if (shutdown_requested)
-        return 0;
-    if (debug_requested)
-        return 0;
-    return 1;
+    return powerdown_requested ||
+           reset_requested ||
+           shutdown_requested ||
+           debug_requested ||
+           vmstop_requested;
 }
+#endif
 
 qemu_irq qemu_system_powerdown;
 
 static void main_loop(void)
 {
+    bool nonblocking = false;
+#ifdef CONFIG_PROFILER
+    int64_t ti;
+#endif
     int r;
 
     qemu_main_loop_start();
 
     for (;;) {
-        do {
-            bool nonblocking = false;
-#ifdef CONFIG_PROFILER
-            int64_t ti;
-#endif
 #ifndef CONFIG_IOTHREAD
-            nonblocking = cpu_exec_all();
+        nonblocking = cpu_exec_all();
+        if (vm_request_pending()) {
+            nonblocking = true;
+        }
 #endif
 #ifdef CONFIG_PROFILER
-            ti = profile_getclock();
+        ti = profile_getclock();
 #endif
-            main_loop_wait(nonblocking);
+        main_loop_wait(nonblocking);
 #ifdef CONFIG_PROFILER
-            dev_time += profile_getclock() - ti;
+        dev_time += profile_getclock() - ti;
 #endif
-        } while (vm_can_run());
 
-        if ((r = qemu_debug_requested())) {
-            vm_stop(r);
+        if (qemu_debug_requested()) {
+            vm_stop(VMSTOP_DEBUG);
         }
         if (qemu_shutdown_requested()) {
             monitor_protocol_event(QEVENT_SHUTDOWN, NULL);
             if (no_shutdown) {
-                vm_stop(0);
+                vm_stop(VMSTOP_SHUTDOWN);
                 no_shutdown = 0;
             } else
                 break;