2008-09-23 03:17:18 +08:00
|
|
|
/*
|
|
|
|
* QEMU aio implementation
|
|
|
|
*
|
|
|
|
* Copyright IBM, Corp. 2008
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Anthony Liguori <aliguori@us.ibm.com>
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2. See
|
|
|
|
* the COPYING file in the top-level directory.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef QEMU_AIO_H
|
|
|
|
#define QEMU_AIO_H
|
|
|
|
|
2013-08-21 23:02:47 +08:00
|
|
|
#include "qemu/typedefs.h"
|
2008-09-23 03:17:18 +08:00
|
|
|
#include "qemu-common.h"
|
2012-12-18 01:20:00 +08:00
|
|
|
#include "qemu/queue.h"
|
|
|
|
#include "qemu/event_notifier.h"
|
2013-07-16 12:28:58 +08:00
|
|
|
#include "qemu/thread.h"
|
2014-03-03 18:30:04 +08:00
|
|
|
#include "qemu/rfifolock.h"
|
2013-08-21 23:02:49 +08:00
|
|
|
#include "qemu/timer.h"
|
2008-09-23 03:17:18 +08:00
|
|
|
|
2012-03-13 00:01:48 +08:00
|
|
|
typedef struct BlockDriverAIOCB BlockDriverAIOCB;
|
|
|
|
typedef void BlockDriverCompletionFunc(void *opaque, int ret);
|
|
|
|
|
2012-10-31 23:34:37 +08:00
|
|
|
typedef struct AIOCBInfo {
|
2014-09-11 13:41:09 +08:00
|
|
|
void (*cancel_async)(BlockDriverAIOCB *acb);
|
|
|
|
AioContext *(*get_aio_context)(BlockDriverAIOCB *acb);
|
2012-10-31 23:34:35 +08:00
|
|
|
size_t aiocb_size;
|
2012-10-31 23:34:37 +08:00
|
|
|
} AIOCBInfo;
|
2012-03-13 00:01:48 +08:00
|
|
|
|
|
|
|
struct BlockDriverAIOCB {
|
2012-10-31 23:34:37 +08:00
|
|
|
const AIOCBInfo *aiocb_info;
|
2012-03-13 00:01:48 +08:00
|
|
|
BlockDriverState *bs;
|
|
|
|
BlockDriverCompletionFunc *cb;
|
|
|
|
void *opaque;
|
2014-09-11 13:41:08 +08:00
|
|
|
int refcnt;
|
2012-03-13 00:01:48 +08:00
|
|
|
};
|
|
|
|
|
2012-10-31 23:34:37 +08:00
|
|
|
void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs,
|
2012-03-13 00:01:48 +08:00
|
|
|
BlockDriverCompletionFunc *cb, void *opaque);
|
|
|
|
void qemu_aio_release(void *p);
|
2014-09-11 13:41:08 +08:00
|
|
|
void qemu_aio_ref(void *p);
|
2012-03-13 00:01:48 +08:00
|
|
|
|
2012-10-30 06:45:23 +08:00
|
|
|
typedef struct AioHandler AioHandler;
|
|
|
|
typedef void QEMUBHFunc(void *opaque);
|
|
|
|
typedef void IOHandler(void *opaque);
|
|
|
|
|
2013-08-21 23:02:47 +08:00
|
|
|
struct AioContext {
|
2012-09-24 20:57:41 +08:00
|
|
|
GSource source;
|
|
|
|
|
2014-03-03 18:30:04 +08:00
|
|
|
/* Protects all fields from multi-threaded access */
|
|
|
|
RFifoLock lock;
|
|
|
|
|
2012-09-13 18:28:51 +08:00
|
|
|
/* The list of registered AIO handlers */
|
|
|
|
QLIST_HEAD(, AioHandler) aio_handlers;
|
|
|
|
|
|
|
|
/* This is a simple lock used to protect the aio_handlers list.
|
|
|
|
* Specifically, it's used to ensure that no callbacks are removed while
|
|
|
|
* we're walking and dispatching callbacks.
|
|
|
|
*/
|
|
|
|
int walking_handlers;
|
|
|
|
|
2014-07-07 21:18:04 +08:00
|
|
|
/* Used to avoid unnecessary event_notifier_set calls in aio_notify.
|
|
|
|
* Writes protected by lock or BQL, reads are lockless.
|
|
|
|
*/
|
|
|
|
bool dispatching;
|
|
|
|
|
2013-07-16 12:28:58 +08:00
|
|
|
/* lock to protect between bh's adders and deleter */
|
|
|
|
QemuMutex bh_lock;
|
2014-07-07 21:18:04 +08:00
|
|
|
|
2012-10-30 06:45:23 +08:00
|
|
|
/* Anchor of the list of Bottom Halves belonging to the context */
|
|
|
|
struct QEMUBH *first_bh;
|
|
|
|
|
|
|
|
/* A simple lock used to protect the first_bh list, and ensure that
|
|
|
|
* no callbacks are removed while we're walking and dispatching callbacks.
|
|
|
|
*/
|
|
|
|
int walking_bh;
|
2012-09-25 00:44:14 +08:00
|
|
|
|
|
|
|
/* Used for aio_notify. */
|
|
|
|
EventNotifier notifier;
|
2013-02-20 18:28:32 +08:00
|
|
|
|
|
|
|
/* GPollFDs for aio_poll() */
|
|
|
|
GArray *pollfds;
|
2013-03-07 20:41:47 +08:00
|
|
|
|
|
|
|
/* Thread pool for performing work and receiving completion callbacks */
|
|
|
|
struct ThreadPool *thread_pool;
|
2013-08-21 23:02:49 +08:00
|
|
|
|
|
|
|
/* TimerLists for calling timers - one per clock type */
|
|
|
|
QEMUTimerListGroup tlg;
|
2013-08-21 23:02:47 +08:00
|
|
|
};
|
2012-10-30 06:45:23 +08:00
|
|
|
|
2014-07-07 21:18:04 +08:00
|
|
|
/* Used internally to synchronize aio_poll against qemu_bh_schedule. */
|
|
|
|
void aio_set_dispatching(AioContext *ctx, bool dispatching);
|
|
|
|
|
2012-10-30 06:45:23 +08:00
|
|
|
/**
|
|
|
|
* aio_context_new: Allocate a new AioContext.
|
|
|
|
*
|
|
|
|
* AioContext provide a mini event-loop that can be waited on synchronously.
|
|
|
|
* They also provide bottom halves, a service to execute a piece of code
|
|
|
|
* as soon as possible.
|
|
|
|
*/
|
|
|
|
AioContext *aio_context_new(void);
|
|
|
|
|
2012-09-24 20:57:41 +08:00
|
|
|
/**
|
|
|
|
* aio_context_ref:
|
|
|
|
* @ctx: The AioContext to operate on.
|
|
|
|
*
|
|
|
|
* Add a reference to an AioContext.
|
|
|
|
*/
|
|
|
|
void aio_context_ref(AioContext *ctx);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* aio_context_unref:
|
|
|
|
* @ctx: The AioContext to operate on.
|
|
|
|
*
|
|
|
|
* Drop a reference to an AioContext.
|
|
|
|
*/
|
|
|
|
void aio_context_unref(AioContext *ctx);
|
|
|
|
|
2014-03-03 18:30:04 +08:00
|
|
|
/* Take ownership of the AioContext. If the AioContext will be shared between
|
|
|
|
* threads, a thread must have ownership when calling aio_poll().
|
|
|
|
*
|
|
|
|
* Note that multiple threads calling aio_poll() means timers, BHs, and
|
|
|
|
* callbacks may be invoked from a different thread than they were registered
|
|
|
|
* from. Therefore, code must use AioContext acquire/release or use
|
|
|
|
* fine-grained synchronization to protect shared state if other threads will
|
|
|
|
* be accessing it simultaneously.
|
|
|
|
*/
|
|
|
|
void aio_context_acquire(AioContext *ctx);
|
|
|
|
|
|
|
|
/* Relinquish ownership of the AioContext. */
|
|
|
|
void aio_context_release(AioContext *ctx);
|
|
|
|
|
2012-10-30 06:45:23 +08:00
|
|
|
/**
|
|
|
|
* aio_bh_new: Allocate a new bottom half structure.
|
|
|
|
*
|
|
|
|
* Bottom halves are lightweight callbacks whose invocation is guaranteed
|
|
|
|
* to be wait-free, thread-safe and signal-safe. The #QEMUBH structure
|
|
|
|
* is opaque and must be allocated prior to its use.
|
|
|
|
*/
|
|
|
|
QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
|
|
|
|
|
2012-09-25 00:44:14 +08:00
|
|
|
/**
|
|
|
|
* aio_notify: Force processing of pending events.
|
|
|
|
*
|
|
|
|
* Similar to signaling a condition variable, aio_notify forces
|
|
|
|
* aio_wait to exit, so that the next call will re-examine pending events.
|
|
|
|
* The caller of aio_notify will usually call aio_wait again very soon,
|
|
|
|
* or go through another iteration of the GLib main loop. Hence, aio_notify
|
|
|
|
* also has the side effect of recalculating the sets of file descriptors
|
|
|
|
* that the main loop waits for.
|
|
|
|
*
|
|
|
|
* Calling aio_notify is rarely necessary, because for example scheduling
|
|
|
|
* a bottom half calls it already.
|
|
|
|
*/
|
|
|
|
void aio_notify(AioContext *ctx);
|
|
|
|
|
2012-10-30 06:45:23 +08:00
|
|
|
/**
|
|
|
|
* aio_bh_poll: Poll bottom halves for an AioContext.
|
|
|
|
*
|
|
|
|
* These are internal functions used by the QEMU main loop.
|
2013-07-16 12:28:58 +08:00
|
|
|
* And notice that multiple occurrences of aio_bh_poll cannot
|
|
|
|
* be called concurrently
|
2012-10-30 06:45:23 +08:00
|
|
|
*/
|
|
|
|
int aio_bh_poll(AioContext *ctx);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* qemu_bh_schedule: Schedule a bottom half.
|
|
|
|
*
|
|
|
|
* Scheduling a bottom half interrupts the main loop and causes the
|
|
|
|
* execution of the callback that was passed to qemu_bh_new.
|
|
|
|
*
|
|
|
|
* Bottom halves that are scheduled from a bottom half handler are instantly
|
|
|
|
* invoked. This can create an infinite loop if a bottom half handler
|
|
|
|
* schedules itself.
|
|
|
|
*
|
|
|
|
* @bh: The bottom half to be scheduled.
|
|
|
|
*/
|
|
|
|
void qemu_bh_schedule(QEMUBH *bh);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* qemu_bh_cancel: Cancel execution of a bottom half.
|
|
|
|
*
|
|
|
|
* Canceling execution of a bottom half undoes the effect of calls to
|
|
|
|
* qemu_bh_schedule without freeing its resources yet. While cancellation
|
|
|
|
* itself is also wait-free and thread-safe, it can of course race with the
|
|
|
|
* loop that executes bottom halves unless you are holding the iothread
|
|
|
|
* mutex. This makes it mostly useless if you are not holding the mutex.
|
|
|
|
*
|
|
|
|
* @bh: The bottom half to be canceled.
|
|
|
|
*/
|
|
|
|
void qemu_bh_cancel(QEMUBH *bh);
|
|
|
|
|
|
|
|
/**
|
|
|
|
*qemu_bh_delete: Cancel execution of a bottom half and free its resources.
|
|
|
|
*
|
|
|
|
* Deleting a bottom half frees the memory that was allocated for it by
|
|
|
|
* qemu_bh_new. It also implies canceling the bottom half if it was
|
|
|
|
* scheduled.
|
2013-07-16 12:28:58 +08:00
|
|
|
* This func is async. The bottom half will do the delete action at the finial
|
|
|
|
* end.
|
2012-10-30 06:45:23 +08:00
|
|
|
*
|
|
|
|
* @bh: The bottom half to be deleted.
|
|
|
|
*/
|
|
|
|
void qemu_bh_delete(QEMUBH *bh);
|
|
|
|
|
2012-09-24 20:57:22 +08:00
|
|
|
/* Return whether there are any pending callbacks from the GSource
|
2014-07-09 17:53:08 +08:00
|
|
|
* attached to the AioContext, before g_poll is invoked.
|
|
|
|
*
|
|
|
|
* This is used internally in the implementation of the GSource.
|
|
|
|
*/
|
|
|
|
bool aio_prepare(AioContext *ctx);
|
|
|
|
|
|
|
|
/* Return whether there are any pending callbacks from the GSource
|
|
|
|
* attached to the AioContext, after g_poll is invoked.
|
2012-09-24 20:57:22 +08:00
|
|
|
*
|
|
|
|
* This is used internally in the implementation of the GSource.
|
|
|
|
*/
|
|
|
|
bool aio_pending(AioContext *ctx);
|
|
|
|
|
2014-07-09 17:53:05 +08:00
|
|
|
/* Dispatch any pending callbacks from the GSource attached to the AioContext.
|
|
|
|
*
|
|
|
|
* This is used internally in the implementation of the GSource.
|
|
|
|
*/
|
|
|
|
bool aio_dispatch(AioContext *ctx);
|
|
|
|
|
2012-09-24 20:37:53 +08:00
|
|
|
/* Progress in completing AIO work to occur. This can issue new pending
|
|
|
|
* aio as a result of executing I/O completion or bh callbacks.
|
2012-04-12 20:00:55 +08:00
|
|
|
*
|
AioContext: do not rely on aio_poll(ctx, true) result to end a loop
Currently, whenever aio_poll(ctx, true) has completed all pending
work it returns true *and* the next call to aio_poll(ctx, true)
will not block.
This invariant has its roots in qemu_aio_flush()'s implementation
as "while (qemu_aio_wait()) {}". However, qemu_aio_flush() does
not exist anymore and bdrv_drain_all() is implemented differently;
and this invariant is complicated to maintain and subtly different
from the return value of GMainLoop's g_main_context_iteration.
All calls to aio_poll(ctx, true) except one are guarded by a
while() loop checking for a request to be incomplete, or a
BlockDriverState to be idle. The one remaining call (in
iothread.c) uses this to delay the aio_context_release/acquire
pair until the AioContext is quiescent, however:
- we can do the same just by using non-blocking aio_poll,
similar to how vl.c invokes main_loop_wait
- it is buggy, because it does not ensure that the AioContext
is released between an aio_notify and the next time the
iothread goes to sleep. This leads to hangs when stopping
the dataplane thread.
In the end, these semantics are a bad match for the current
users of AioContext. So modify that one exception in iothread.c,
which also fixes the hangs, as well as the testcase so that
it use the same idiom as the actual QEMU code.
Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2014-07-09 16:49:46 +08:00
|
|
|
* Return whether any progress was made by executing AIO or bottom half
|
|
|
|
* handlers. If @blocking == true, this should always be true except
|
|
|
|
* if someone called aio_notify.
|
2012-09-24 20:37:53 +08:00
|
|
|
*
|
|
|
|
* If there are no pending bottom halves, but there are pending AIO
|
|
|
|
* operations, it may not be possible to make any progress without
|
|
|
|
* blocking. If @blocking is true, this function will wait until one
|
|
|
|
* or more AIO events have completed, to ensure something has moved
|
|
|
|
* before returning.
|
|
|
|
*/
|
|
|
|
bool aio_poll(AioContext *ctx, bool blocking);
|
2008-09-23 03:17:18 +08:00
|
|
|
|
|
|
|
/* Register a file descriptor and associated callbacks. Behaves very similarly
|
|
|
|
* to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will
|
2014-07-07 21:18:02 +08:00
|
|
|
* be invoked when using aio_poll().
|
2008-09-23 03:17:18 +08:00
|
|
|
*
|
|
|
|
* Code that invokes AIO completion functions should rely on this function
|
|
|
|
* instead of qemu_set_fd_handler[2].
|
|
|
|
*/
|
2012-09-13 18:28:51 +08:00
|
|
|
void aio_set_fd_handler(AioContext *ctx,
|
|
|
|
int fd,
|
|
|
|
IOHandler *io_read,
|
|
|
|
IOHandler *io_write,
|
|
|
|
void *opaque);
|
2012-06-09 09:44:00 +08:00
|
|
|
|
|
|
|
/* Register an event notifier and associated callbacks. Behaves very similarly
|
|
|
|
* to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks
|
2014-07-07 21:18:02 +08:00
|
|
|
* will be invoked when using aio_poll().
|
2012-06-09 09:44:00 +08:00
|
|
|
*
|
|
|
|
* Code that invokes AIO completion functions should rely on this function
|
|
|
|
* instead of event_notifier_set_handler.
|
|
|
|
*/
|
2012-09-13 18:28:51 +08:00
|
|
|
void aio_set_event_notifier(AioContext *ctx,
|
|
|
|
EventNotifier *notifier,
|
2013-04-11 23:26:25 +08:00
|
|
|
EventNotifierHandler *io_read);
|
2012-09-13 18:28:51 +08:00
|
|
|
|
2012-09-24 20:57:41 +08:00
|
|
|
/* Return a GSource that lets the main loop poll the file descriptors attached
|
|
|
|
* to this AioContext.
|
|
|
|
*/
|
|
|
|
GSource *aio_get_g_source(AioContext *ctx);
|
|
|
|
|
2013-03-07 20:41:47 +08:00
|
|
|
/* Return the ThreadPool bound to this AioContext */
|
|
|
|
struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
|
|
|
|
|
2013-08-21 23:02:52 +08:00
|
|
|
/**
|
|
|
|
* aio_timer_new:
|
|
|
|
* @ctx: the aio context
|
|
|
|
* @type: the clock type
|
|
|
|
* @scale: the scale
|
|
|
|
* @cb: the callback to call on timer expiry
|
|
|
|
* @opaque: the opaque pointer to pass to the callback
|
|
|
|
*
|
|
|
|
* Allocate a new timer attached to the context @ctx.
|
|
|
|
* The function is responsible for memory allocation.
|
|
|
|
*
|
|
|
|
* The preferred interface is aio_timer_init. Use that
|
|
|
|
* unless you really need dynamic memory allocation.
|
|
|
|
*
|
|
|
|
* Returns: a pointer to the new timer
|
|
|
|
*/
|
|
|
|
static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type,
|
|
|
|
int scale,
|
|
|
|
QEMUTimerCB *cb, void *opaque)
|
|
|
|
{
|
|
|
|
return timer_new_tl(ctx->tlg.tl[type], scale, cb, opaque);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* aio_timer_init:
|
|
|
|
* @ctx: the aio context
|
|
|
|
* @ts: the timer
|
|
|
|
* @type: the clock type
|
|
|
|
* @scale: the scale
|
|
|
|
* @cb: the callback to call on timer expiry
|
|
|
|
* @opaque: the opaque pointer to pass to the callback
|
|
|
|
*
|
|
|
|
* Initialise a new timer attached to the context @ctx.
|
|
|
|
* The caller is responsible for memory allocation.
|
|
|
|
*/
|
|
|
|
static inline void aio_timer_init(AioContext *ctx,
|
|
|
|
QEMUTimer *ts, QEMUClockType type,
|
|
|
|
int scale,
|
|
|
|
QEMUTimerCB *cb, void *opaque)
|
|
|
|
{
|
|
|
|
timer_init(ts, ctx->tlg.tl[type], scale, cb, opaque);
|
|
|
|
}
|
|
|
|
|
2014-07-09 17:53:01 +08:00
|
|
|
/**
|
|
|
|
* aio_compute_timeout:
|
|
|
|
* @ctx: the aio context
|
|
|
|
*
|
|
|
|
* Compute the timeout that a blocking aio_poll should use.
|
|
|
|
*/
|
|
|
|
int64_t aio_compute_timeout(AioContext *ctx);
|
|
|
|
|
2008-09-23 03:17:18 +08:00
|
|
|
#endif
|