mirror of https://gitee.com/openkylin/qemu.git
Bugfixes, and reworking of the atomics documentation.
-----BEGIN PGP SIGNATURE----- iQFIBAABCAAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAl6UDRYUHHBib256aW5p QHJlZGhhdC5jb20ACgkQv/vSX3jHroOSGggAgy/pnlhh5NjGc0PZLhz09O1MlOiT iS8/RCudLR/yDJ0K7pweWKc1lGrS11G1n1P+58G6sK7al4NOdlMMgtk1VtZAlMJ4 dSQ+DGV7JaoPztu5ec2V7LiJmhyxrVaKx7xg9JGx0bZ/1wCC1GqZUlZ2hYdgQ8L4 EchdwqzRd2sznlUVAP19ZcPb6sYG2VlkIzFytd5p3xZqrr0g3RJa7nmWRWAnEx1L 5/13U6g2PEU3jFKTtOcELFq8F/tB8id+fwIE2GB3glKzBHXnJSAfpzBV3/8L72xV JqSUa62O12qGX5k5F9BJPgcfxs40wyEkWTBJuW+WQvsmI73EJ3B30gjkhw== =8Dsv -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/bonzini/tags/for-upstream' into staging Bugfixes, and reworking of the atomics documentation. # gpg: Signature made Mon 13 Apr 2020 07:56:22 BST # gpg: using RSA key F13338574B662389866C7682BFFBD25F78C7AE83 # gpg: issuer "pbonzini@redhat.com" # gpg: Good signature from "Paolo Bonzini <bonzini@gnu.org>" [full] # gpg: aka "Paolo Bonzini <pbonzini@redhat.com>" [full] # Primary key fingerprint: 46F5 9FBD 57D6 12E7 BFD4 E2F7 7E15 100C CD36 69B1 # Subkey fingerprint: F133 3857 4B66 2389 866C 7682 BFFB D25F 78C7 AE83 * remotes/bonzini/tags/for-upstream: module: increase dirs array size by one memory: Do not allow direct write access to rom_device regions vl.c: error out if -mem-path is used together with -M memory-backend rcu: do not mention atomic_mb_read/set in documentation atomics: update documentation atomics: convert to reStructuredText oslib-posix: take lock before qemu_cond_broadcast piix: fix xenfv regression, add compat machine xenfv-4.2 Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
e33d61cc9a
|
@ -0,0 +1,507 @@
|
|||
=========================
|
||||
Atomic operations in QEMU
|
||||
=========================
|
||||
|
||||
CPUs perform independent memory operations effectively in random order.
|
||||
but this can be a problem for CPU-CPU interaction (including interactions
|
||||
between QEMU and the guest). Multi-threaded programs use various tools
|
||||
to instruct the compiler and the CPU to restrict the order to something
|
||||
that is consistent with the expectations of the programmer.
|
||||
|
||||
The most basic tool is locking. Mutexes, condition variables and
|
||||
semaphores are used in QEMU, and should be the default approach to
|
||||
synchronization. Anything else is considerably harder, but it's
|
||||
also justified more often than one would like;
|
||||
the most performance-critical parts of QEMU in particular require
|
||||
a very low level approach to concurrency, involving memory barriers
|
||||
and atomic operations. The semantics of concurrent memory accesses are governed
|
||||
by the C11 memory model.
|
||||
|
||||
QEMU provides a header, ``qemu/atomic.h``, which wraps C11 atomics to
|
||||
provide better portability and a less verbose syntax. ``qemu/atomic.h``
|
||||
provides macros that fall in three camps:
|
||||
|
||||
- compiler barriers: ``barrier()``;
|
||||
|
||||
- weak atomic access and manual memory barriers: ``atomic_read()``,
|
||||
``atomic_set()``, ``smp_rmb()``, ``smp_wmb()``, ``smp_mb()``, ``smp_mb_acquire()``,
|
||||
``smp_mb_release()``, ``smp_read_barrier_depends()``;
|
||||
|
||||
- sequentially consistent atomic access: everything else.
|
||||
|
||||
In general, use of ``qemu/atomic.h`` should be wrapped with more easily
|
||||
used data structures (e.g. the lock-free singly-linked list operations
|
||||
``QSLIST_INSERT_HEAD_ATOMIC`` and ``QSLIST_MOVE_ATOMIC``) or synchronization
|
||||
primitives (such as RCU, ``QemuEvent`` or ``QemuLockCnt``). Bare use of
|
||||
atomic operations and memory barriers should be limited to inter-thread
|
||||
checking of flags and documented thoroughly.
|
||||
|
||||
|
||||
|
||||
Compiler memory barrier
|
||||
=======================
|
||||
|
||||
``barrier()`` prevents the compiler from moving the memory accesses on
|
||||
either side of it to the other side. The compiler barrier has no direct
|
||||
effect on the CPU, which may then reorder things however it wishes.
|
||||
|
||||
``barrier()`` is mostly used within ``qemu/atomic.h`` itself. On some
|
||||
architectures, CPU guarantees are strong enough that blocking compiler
|
||||
optimizations already ensures the correct order of execution. In this
|
||||
case, ``qemu/atomic.h`` will reduce stronger memory barriers to simple
|
||||
compiler barriers.
|
||||
|
||||
Still, ``barrier()`` can be useful when writing code that can be interrupted
|
||||
by signal handlers.
|
||||
|
||||
|
||||
Sequentially consistent atomic access
|
||||
=====================================
|
||||
|
||||
Most of the operations in the ``qemu/atomic.h`` header ensure *sequential
|
||||
consistency*, where "the result of any execution is the same as if the
|
||||
operations of all the processors were executed in some sequential order,
|
||||
and the operations of each individual processor appear in this sequence
|
||||
in the order specified by its program".
|
||||
|
||||
``qemu/atomic.h`` provides the following set of atomic read-modify-write
|
||||
operations::
|
||||
|
||||
void atomic_inc(ptr)
|
||||
void atomic_dec(ptr)
|
||||
void atomic_add(ptr, val)
|
||||
void atomic_sub(ptr, val)
|
||||
void atomic_and(ptr, val)
|
||||
void atomic_or(ptr, val)
|
||||
|
||||
typeof(*ptr) atomic_fetch_inc(ptr)
|
||||
typeof(*ptr) atomic_fetch_dec(ptr)
|
||||
typeof(*ptr) atomic_fetch_add(ptr, val)
|
||||
typeof(*ptr) atomic_fetch_sub(ptr, val)
|
||||
typeof(*ptr) atomic_fetch_and(ptr, val)
|
||||
typeof(*ptr) atomic_fetch_or(ptr, val)
|
||||
typeof(*ptr) atomic_fetch_xor(ptr, val)
|
||||
typeof(*ptr) atomic_fetch_inc_nonzero(ptr)
|
||||
typeof(*ptr) atomic_xchg(ptr, val)
|
||||
typeof(*ptr) atomic_cmpxchg(ptr, old, new)
|
||||
|
||||
all of which return the old value of ``*ptr``. These operations are
|
||||
polymorphic; they operate on any type that is as wide as a pointer or
|
||||
smaller.
|
||||
|
||||
Similar operations return the new value of ``*ptr``::
|
||||
|
||||
typeof(*ptr) atomic_inc_fetch(ptr)
|
||||
typeof(*ptr) atomic_dec_fetch(ptr)
|
||||
typeof(*ptr) atomic_add_fetch(ptr, val)
|
||||
typeof(*ptr) atomic_sub_fetch(ptr, val)
|
||||
typeof(*ptr) atomic_and_fetch(ptr, val)
|
||||
typeof(*ptr) atomic_or_fetch(ptr, val)
|
||||
typeof(*ptr) atomic_xor_fetch(ptr, val)
|
||||
|
||||
``qemu/atomic.h`` also provides loads and stores that cannot be reordered
|
||||
with each other::
|
||||
|
||||
typeof(*ptr) atomic_mb_read(ptr)
|
||||
void atomic_mb_set(ptr, val)
|
||||
|
||||
However these do not provide sequential consistency and, in particular,
|
||||
they do not participate in the total ordering enforced by
|
||||
sequentially-consistent operations. For this reason they are deprecated.
|
||||
They should instead be replaced with any of the following (ordered from
|
||||
easiest to hardest):
|
||||
|
||||
- accesses inside a mutex or spinlock
|
||||
|
||||
- lightweight synchronization primitives such as ``QemuEvent``
|
||||
|
||||
- RCU operations (``atomic_rcu_read``, ``atomic_rcu_set``) when publishing
|
||||
or accessing a new version of a data structure
|
||||
|
||||
- other atomic accesses: ``atomic_read`` and ``atomic_load_acquire`` for
|
||||
loads, ``atomic_set`` and ``atomic_store_release`` for stores, ``smp_mb``
|
||||
to forbid reordering subsequent loads before a store.
|
||||
|
||||
|
||||
Weak atomic access and manual memory barriers
|
||||
=============================================
|
||||
|
||||
Compared to sequentially consistent atomic access, programming with
|
||||
weaker consistency models can be considerably more complicated.
|
||||
The only guarantees that you can rely upon in this case are:
|
||||
|
||||
- atomic accesses will not cause data races (and hence undefined behavior);
|
||||
ordinary accesses instead cause data races if they are concurrent with
|
||||
other accesses of which at least one is a write. In order to ensure this,
|
||||
the compiler will not optimize accesses out of existence, create unsolicited
|
||||
accesses, or perform other similar optimzations.
|
||||
|
||||
- acquire operations will appear to happen, with respect to the other
|
||||
components of the system, before all the LOAD or STORE operations
|
||||
specified afterwards.
|
||||
|
||||
- release operations will appear to happen, with respect to the other
|
||||
components of the system, after all the LOAD or STORE operations
|
||||
specified before.
|
||||
|
||||
- release operations will *synchronize with* acquire operations;
|
||||
see :ref:`acqrel` for a detailed explanation.
|
||||
|
||||
When using this model, variables are accessed with:
|
||||
|
||||
- ``atomic_read()`` and ``atomic_set()``; these prevent the compiler from
|
||||
optimizing accesses out of existence and creating unsolicited
|
||||
accesses, but do not otherwise impose any ordering on loads and
|
||||
stores: both the compiler and the processor are free to reorder
|
||||
them.
|
||||
|
||||
- ``atomic_load_acquire()``, which guarantees the LOAD to appear to
|
||||
happen, with respect to the other components of the system,
|
||||
before all the LOAD or STORE operations specified afterwards.
|
||||
Operations coming before ``atomic_load_acquire()`` can still be
|
||||
reordered after it.
|
||||
|
||||
- ``atomic_store_release()``, which guarantees the STORE to appear to
|
||||
happen, with respect to the other components of the system,
|
||||
after all the LOAD or STORE operations specified before.
|
||||
Operations coming after ``atomic_store_release()`` can still be
|
||||
reordered before it.
|
||||
|
||||
Restrictions to the ordering of accesses can also be specified
|
||||
using the memory barrier macros: ``smp_rmb()``, ``smp_wmb()``, ``smp_mb()``,
|
||||
``smp_mb_acquire()``, ``smp_mb_release()``, ``smp_read_barrier_depends()``.
|
||||
|
||||
Memory barriers control the order of references to shared memory.
|
||||
They come in six kinds:
|
||||
|
||||
- ``smp_rmb()`` guarantees that all the LOAD operations specified before
|
||||
the barrier will appear to happen before all the LOAD operations
|
||||
specified after the barrier with respect to the other components of
|
||||
the system.
|
||||
|
||||
In other words, ``smp_rmb()`` puts a partial ordering on loads, but is not
|
||||
required to have any effect on stores.
|
||||
|
||||
- ``smp_wmb()`` guarantees that all the STORE operations specified before
|
||||
the barrier will appear to happen before all the STORE operations
|
||||
specified after the barrier with respect to the other components of
|
||||
the system.
|
||||
|
||||
In other words, ``smp_wmb()`` puts a partial ordering on stores, but is not
|
||||
required to have any effect on loads.
|
||||
|
||||
- ``smp_mb_acquire()`` guarantees that all the LOAD operations specified before
|
||||
the barrier will appear to happen before all the LOAD or STORE operations
|
||||
specified after the barrier with respect to the other components of
|
||||
the system.
|
||||
|
||||
- ``smp_mb_release()`` guarantees that all the STORE operations specified *after*
|
||||
the barrier will appear to happen after all the LOAD or STORE operations
|
||||
specified *before* the barrier with respect to the other components of
|
||||
the system.
|
||||
|
||||
- ``smp_mb()`` guarantees that all the LOAD and STORE operations specified
|
||||
before the barrier will appear to happen before all the LOAD and
|
||||
STORE operations specified after the barrier with respect to the other
|
||||
components of the system.
|
||||
|
||||
``smp_mb()`` puts a partial ordering on both loads and stores. It is
|
||||
stronger than both a read and a write memory barrier; it implies both
|
||||
``smp_mb_acquire()`` and ``smp_mb_release()``, but it also prevents STOREs
|
||||
coming before the barrier from overtaking LOADs coming after the
|
||||
barrier and vice versa.
|
||||
|
||||
- ``smp_read_barrier_depends()`` is a weaker kind of read barrier. On
|
||||
most processors, whenever two loads are performed such that the
|
||||
second depends on the result of the first (e.g., the first load
|
||||
retrieves the address to which the second load will be directed),
|
||||
the processor will guarantee that the first LOAD will appear to happen
|
||||
before the second with respect to the other components of the system.
|
||||
However, this is not always true---for example, it was not true on
|
||||
Alpha processors. Whenever this kind of access happens to shared
|
||||
memory (that is not protected by a lock), a read barrier is needed,
|
||||
and ``smp_read_barrier_depends()`` can be used instead of ``smp_rmb()``.
|
||||
|
||||
Note that the first load really has to have a _data_ dependency and not
|
||||
a control dependency. If the address for the second load is dependent
|
||||
on the first load, but the dependency is through a conditional rather
|
||||
than actually loading the address itself, then it's a _control_
|
||||
dependency and a full read barrier or better is required.
|
||||
|
||||
|
||||
Memory barriers and ``atomic_load_acquire``/``atomic_store_release`` are
|
||||
mostly used when a data structure has one thread that is always a writer
|
||||
and one thread that is always a reader:
|
||||
|
||||
+----------------------------------+----------------------------------+
|
||||
| thread 1 | thread 2 |
|
||||
+==================================+==================================+
|
||||
| :: | :: |
|
||||
| | |
|
||||
| atomic_store_release(&a, x); | y = atomic_load_acquire(&b); |
|
||||
| atomic_store_release(&b, y); | x = atomic_load_acquire(&a); |
|
||||
+----------------------------------+----------------------------------+
|
||||
|
||||
In this case, correctness is easy to check for using the "pairing"
|
||||
trick that is explained below.
|
||||
|
||||
Sometimes, a thread is accessing many variables that are otherwise
|
||||
unrelated to each other (for example because, apart from the current
|
||||
thread, exactly one other thread will read or write each of these
|
||||
variables). In this case, it is possible to "hoist" the barriers
|
||||
outside a loop. For example:
|
||||
|
||||
+------------------------------------------+----------------------------------+
|
||||
| before | after |
|
||||
+==========================================+==================================+
|
||||
| :: | :: |
|
||||
| | |
|
||||
| n = 0; | n = 0; |
|
||||
| for (i = 0; i < 10; i++) | for (i = 0; i < 10; i++) |
|
||||
| n += atomic_load_acquire(&a[i]); | n += atomic_read(&a[i]); |
|
||||
| | smp_mb_acquire(); |
|
||||
+------------------------------------------+----------------------------------+
|
||||
| :: | :: |
|
||||
| | |
|
||||
| | smp_mb_release(); |
|
||||
| for (i = 0; i < 10; i++) | for (i = 0; i < 10; i++) |
|
||||
| atomic_store_release(&a[i], false); | atomic_set(&a[i], false); |
|
||||
+------------------------------------------+----------------------------------+
|
||||
|
||||
Splitting a loop can also be useful to reduce the number of barriers:
|
||||
|
||||
+------------------------------------------+----------------------------------+
|
||||
| before | after |
|
||||
+==========================================+==================================+
|
||||
| :: | :: |
|
||||
| | |
|
||||
| n = 0; | smp_mb_release(); |
|
||||
| for (i = 0; i < 10; i++) { | for (i = 0; i < 10; i++) |
|
||||
| atomic_store_release(&a[i], false); | atomic_set(&a[i], false); |
|
||||
| smp_mb(); | smb_mb(); |
|
||||
| n += atomic_read(&b[i]); | n = 0; |
|
||||
| } | for (i = 0; i < 10; i++) |
|
||||
| | n += atomic_read(&b[i]); |
|
||||
+------------------------------------------+----------------------------------+
|
||||
|
||||
In this case, a ``smp_mb_release()`` is also replaced with a (possibly cheaper, and clearer
|
||||
as well) ``smp_wmb()``:
|
||||
|
||||
+------------------------------------------+----------------------------------+
|
||||
| before | after |
|
||||
+==========================================+==================================+
|
||||
| :: | :: |
|
||||
| | |
|
||||
| | smp_mb_release(); |
|
||||
| for (i = 0; i < 10; i++) { | for (i = 0; i < 10; i++) |
|
||||
| atomic_store_release(&a[i], false); | atomic_set(&a[i], false); |
|
||||
| atomic_store_release(&b[i], false); | smb_wmb(); |
|
||||
| } | for (i = 0; i < 10; i++) |
|
||||
| | atomic_set(&b[i], false); |
|
||||
+------------------------------------------+----------------------------------+
|
||||
|
||||
|
||||
.. _acqrel:
|
||||
|
||||
Acquire/release pairing and the *synchronizes-with* relation
|
||||
------------------------------------------------------------
|
||||
|
||||
Atomic operations other than ``atomic_set()`` and ``atomic_read()`` have
|
||||
either *acquire* or *release* semantics [#rmw]_. This has two effects:
|
||||
|
||||
.. [#rmw] Read-modify-write operations can have both---acquire applies to the
|
||||
read part, and release to the write.
|
||||
|
||||
- within a thread, they are ordered either before subsequent operations
|
||||
(for acquire) or after previous operations (for release).
|
||||
|
||||
- if a release operation in one thread *synchronizes with* an acquire operation
|
||||
in another thread, the ordering constraints propagates from the first to the
|
||||
second thread. That is, everything before the release operation in the
|
||||
first thread is guaranteed to *happen before* everything after the
|
||||
acquire operation in the second thread.
|
||||
|
||||
The concept of acquire and release semantics is not exclusive to atomic
|
||||
operations; almost all higher-level synchronization primitives also have
|
||||
acquire or release semantics. For example:
|
||||
|
||||
- ``pthread_mutex_lock`` has acquire semantics, ``pthread_mutex_unlock`` has
|
||||
release semantics and synchronizes with a ``pthread_mutex_lock`` for the
|
||||
same mutex.
|
||||
|
||||
- ``pthread_cond_signal`` and ``pthread_cond_broadcast`` have release semantics;
|
||||
``pthread_cond_wait`` has both release semantics (synchronizing with
|
||||
``pthread_mutex_lock``) and acquire semantics (synchronizing with
|
||||
``pthread_mutex_unlock`` and signaling of the condition variable).
|
||||
|
||||
- ``pthread_create`` has release semantics and synchronizes with the start
|
||||
of the new thread; ``pthread_join`` has acquire semantics and synchronizes
|
||||
with the exiting of the thread.
|
||||
|
||||
- ``qemu_event_set`` has release semantics, ``qemu_event_wait`` has
|
||||
acquire semantics.
|
||||
|
||||
For example, in the following example there are no atomic accesses, but still
|
||||
thread 2 is relying on the *synchronizes-with* relation between ``pthread_exit``
|
||||
(release) and ``pthread_join`` (acquire):
|
||||
|
||||
+----------------------+-------------------------------+
|
||||
| thread 1 | thread 2 |
|
||||
+======================+===============================+
|
||||
| :: | :: |
|
||||
| | |
|
||||
| *a = 1; | |
|
||||
| pthread_exit(a); | pthread_join(thread1, &a); |
|
||||
| | x = *a; |
|
||||
+----------------------+-------------------------------+
|
||||
|
||||
Synchronization between threads basically descends from this pairing of
|
||||
a release operation and an acquire operation. Therefore, atomic operations
|
||||
other than ``atomic_set()`` and ``atomic_read()`` will almost always be
|
||||
paired with another operation of the opposite kind: an acquire operation
|
||||
will pair with a release operation and vice versa. This rule of thumb is
|
||||
extremely useful; in the case of QEMU, however, note that the other
|
||||
operation may actually be in a driver that runs in the guest!
|
||||
|
||||
``smp_read_barrier_depends()``, ``smp_rmb()``, ``smp_mb_acquire()``,
|
||||
``atomic_load_acquire()`` and ``atomic_rcu_read()`` all count
|
||||
as acquire operations. ``smp_wmb()``, ``smp_mb_release()``,
|
||||
``atomic_store_release()`` and ``atomic_rcu_set()`` all count as release
|
||||
operations. ``smp_mb()`` counts as both acquire and release, therefore
|
||||
it can pair with any other atomic operation. Here is an example:
|
||||
|
||||
+----------------------+------------------------------+
|
||||
| thread 1 | thread 2 |
|
||||
+======================+==============================+
|
||||
| :: | :: |
|
||||
| | |
|
||||
| atomic_set(&a, 1); | |
|
||||
| smp_wmb(); | |
|
||||
| atomic_set(&b, 2); | x = atomic_read(&b); |
|
||||
| | smp_rmb(); |
|
||||
| | y = atomic_read(&a); |
|
||||
+----------------------+------------------------------+
|
||||
|
||||
Note that a load-store pair only counts if the two operations access the
|
||||
same variable: that is, a store-release on a variable ``x`` *synchronizes
|
||||
with* a load-acquire on a variable ``x``, while a release barrier
|
||||
synchronizes with any acquire operation. The following example shows
|
||||
correct synchronization:
|
||||
|
||||
+--------------------------------+--------------------------------+
|
||||
| thread 1 | thread 2 |
|
||||
+================================+================================+
|
||||
| :: | :: |
|
||||
| | |
|
||||
| atomic_set(&a, 1); | |
|
||||
| atomic_store_release(&b, 2); | x = atomic_load_acquire(&b); |
|
||||
| | y = atomic_read(&a); |
|
||||
+--------------------------------+--------------------------------+
|
||||
|
||||
Acquire and release semantics of higher-level primitives can also be
|
||||
relied upon for the purpose of establishing the *synchronizes with*
|
||||
relation.
|
||||
|
||||
Note that the "writing" thread is accessing the variables in the
|
||||
opposite order as the "reading" thread. This is expected: stores
|
||||
before a release operation will normally match the loads after
|
||||
the acquire operation, and vice versa. In fact, this happened already
|
||||
in the ``pthread_exit``/``pthread_join`` example above.
|
||||
|
||||
Finally, this more complex example has more than two accesses and data
|
||||
dependency barriers. It also does not use atomic accesses whenever there
|
||||
cannot be a data race:
|
||||
|
||||
+----------------------+------------------------------+
|
||||
| thread 1 | thread 2 |
|
||||
+======================+==============================+
|
||||
| :: | :: |
|
||||
| | |
|
||||
| b[2] = 1; | |
|
||||
| smp_wmb(); | |
|
||||
| x->i = 2; | |
|
||||
| smp_wmb(); | |
|
||||
| atomic_set(&a, x); | x = atomic_read(&a); |
|
||||
| | smp_read_barrier_depends(); |
|
||||
| | y = x->i; |
|
||||
| | smp_read_barrier_depends(); |
|
||||
| | z = b[y]; |
|
||||
+----------------------+------------------------------+
|
||||
|
||||
Comparison with Linux kernel primitives
|
||||
=======================================
|
||||
|
||||
Here is a list of differences between Linux kernel atomic operations
|
||||
and memory barriers, and the equivalents in QEMU:
|
||||
|
||||
- atomic operations in Linux are always on a 32-bit int type and
|
||||
use a boxed ``atomic_t`` type; atomic operations in QEMU are polymorphic
|
||||
and use normal C types.
|
||||
|
||||
- Originally, ``atomic_read`` and ``atomic_set`` in Linux gave no guarantee
|
||||
at all. Linux 4.1 updated them to implement volatile
|
||||
semantics via ``ACCESS_ONCE`` (or the more recent ``READ``/``WRITE_ONCE``).
|
||||
|
||||
QEMU's ``atomic_read`` and ``atomic_set`` implement C11 atomic relaxed
|
||||
semantics if the compiler supports it, and volatile semantics otherwise.
|
||||
Both semantics prevent the compiler from doing certain transformations;
|
||||
the difference is that atomic accesses are guaranteed to be atomic,
|
||||
while volatile accesses aren't. Thus, in the volatile case we just cross
|
||||
our fingers hoping that the compiler will generate atomic accesses,
|
||||
since we assume the variables passed are machine-word sized and
|
||||
properly aligned.
|
||||
|
||||
No barriers are implied by ``atomic_read`` and ``atomic_set`` in either Linux
|
||||
or QEMU.
|
||||
|
||||
- atomic read-modify-write operations in Linux are of three kinds:
|
||||
|
||||
===================== =========================================
|
||||
``atomic_OP`` returns void
|
||||
``atomic_OP_return`` returns new value of the variable
|
||||
``atomic_fetch_OP`` returns the old value of the variable
|
||||
``atomic_cmpxchg`` returns the old value of the variable
|
||||
===================== =========================================
|
||||
|
||||
In QEMU, the second kind is named ``atomic_OP_fetch``.
|
||||
|
||||
- different atomic read-modify-write operations in Linux imply
|
||||
a different set of memory barriers; in QEMU, all of them enforce
|
||||
sequential consistency.
|
||||
|
||||
- in QEMU, ``atomic_read()`` and ``atomic_set()`` do not participate in
|
||||
the total ordering enforced by sequentially-consistent operations.
|
||||
This is because QEMU uses the C11 memory model. The following example
|
||||
is correct in Linux but not in QEMU:
|
||||
|
||||
+----------------------------------+--------------------------------+
|
||||
| Linux (correct) | QEMU (incorrect) |
|
||||
+==================================+================================+
|
||||
| :: | :: |
|
||||
| | |
|
||||
| a = atomic_fetch_add(&x, 2); | a = atomic_fetch_add(&x, 2); |
|
||||
| b = READ_ONCE(&y); | b = atomic_read(&y); |
|
||||
+----------------------------------+--------------------------------+
|
||||
|
||||
because the read of ``y`` can be moved (by either the processor or the
|
||||
compiler) before the write of ``x``.
|
||||
|
||||
Fixing this requires an ``smp_mb()`` memory barrier between the write
|
||||
of ``x`` and the read of ``y``. In the common case where only one thread
|
||||
writes ``x``, it is also possible to write it like this:
|
||||
|
||||
+--------------------------------+
|
||||
| QEMU (correct) |
|
||||
+================================+
|
||||
| :: |
|
||||
| |
|
||||
| a = atomic_read(&x); |
|
||||
| atomic_set(&x, a + 2); |
|
||||
| smp_mb(); |
|
||||
| b = atomic_read(&y); |
|
||||
+--------------------------------+
|
||||
|
||||
Sources
|
||||
=======
|
||||
|
||||
- ``Documentation/memory-barriers.txt`` from the Linux kernel
|
|
@ -1,403 +0,0 @@
|
|||
CPUs perform independent memory operations effectively in random order.
|
||||
but this can be a problem for CPU-CPU interaction (including interactions
|
||||
between QEMU and the guest). Multi-threaded programs use various tools
|
||||
to instruct the compiler and the CPU to restrict the order to something
|
||||
that is consistent with the expectations of the programmer.
|
||||
|
||||
The most basic tool is locking. Mutexes, condition variables and
|
||||
semaphores are used in QEMU, and should be the default approach to
|
||||
synchronization. Anything else is considerably harder, but it's
|
||||
also justified more often than one would like. The two tools that
|
||||
are provided by qemu/atomic.h are memory barriers and atomic operations.
|
||||
|
||||
Macros defined by qemu/atomic.h fall in three camps:
|
||||
|
||||
- compiler barriers: barrier();
|
||||
|
||||
- weak atomic access and manual memory barriers: atomic_read(),
|
||||
atomic_set(), smp_rmb(), smp_wmb(), smp_mb(), smp_mb_acquire(),
|
||||
smp_mb_release(), smp_read_barrier_depends();
|
||||
|
||||
- sequentially consistent atomic access: everything else.
|
||||
|
||||
|
||||
COMPILER MEMORY BARRIER
|
||||
=======================
|
||||
|
||||
barrier() prevents the compiler from moving the memory accesses either
|
||||
side of it to the other side. The compiler barrier has no direct effect
|
||||
on the CPU, which may then reorder things however it wishes.
|
||||
|
||||
barrier() is mostly used within qemu/atomic.h itself. On some
|
||||
architectures, CPU guarantees are strong enough that blocking compiler
|
||||
optimizations already ensures the correct order of execution. In this
|
||||
case, qemu/atomic.h will reduce stronger memory barriers to simple
|
||||
compiler barriers.
|
||||
|
||||
Still, barrier() can be useful when writing code that can be interrupted
|
||||
by signal handlers.
|
||||
|
||||
|
||||
SEQUENTIALLY CONSISTENT ATOMIC ACCESS
|
||||
=====================================
|
||||
|
||||
Most of the operations in the qemu/atomic.h header ensure *sequential
|
||||
consistency*, where "the result of any execution is the same as if the
|
||||
operations of all the processors were executed in some sequential order,
|
||||
and the operations of each individual processor appear in this sequence
|
||||
in the order specified by its program".
|
||||
|
||||
qemu/atomic.h provides the following set of atomic read-modify-write
|
||||
operations:
|
||||
|
||||
void atomic_inc(ptr)
|
||||
void atomic_dec(ptr)
|
||||
void atomic_add(ptr, val)
|
||||
void atomic_sub(ptr, val)
|
||||
void atomic_and(ptr, val)
|
||||
void atomic_or(ptr, val)
|
||||
|
||||
typeof(*ptr) atomic_fetch_inc(ptr)
|
||||
typeof(*ptr) atomic_fetch_dec(ptr)
|
||||
typeof(*ptr) atomic_fetch_add(ptr, val)
|
||||
typeof(*ptr) atomic_fetch_sub(ptr, val)
|
||||
typeof(*ptr) atomic_fetch_and(ptr, val)
|
||||
typeof(*ptr) atomic_fetch_or(ptr, val)
|
||||
typeof(*ptr) atomic_fetch_xor(ptr, val)
|
||||
typeof(*ptr) atomic_fetch_inc_nonzero(ptr)
|
||||
typeof(*ptr) atomic_xchg(ptr, val)
|
||||
typeof(*ptr) atomic_cmpxchg(ptr, old, new)
|
||||
|
||||
all of which return the old value of *ptr. These operations are
|
||||
polymorphic; they operate on any type that is as wide as a pointer.
|
||||
|
||||
Similar operations return the new value of *ptr:
|
||||
|
||||
typeof(*ptr) atomic_inc_fetch(ptr)
|
||||
typeof(*ptr) atomic_dec_fetch(ptr)
|
||||
typeof(*ptr) atomic_add_fetch(ptr, val)
|
||||
typeof(*ptr) atomic_sub_fetch(ptr, val)
|
||||
typeof(*ptr) atomic_and_fetch(ptr, val)
|
||||
typeof(*ptr) atomic_or_fetch(ptr, val)
|
||||
typeof(*ptr) atomic_xor_fetch(ptr, val)
|
||||
|
||||
Sequentially consistent loads and stores can be done using:
|
||||
|
||||
atomic_fetch_add(ptr, 0) for loads
|
||||
atomic_xchg(ptr, val) for stores
|
||||
|
||||
However, they are quite expensive on some platforms, notably POWER and
|
||||
Arm. Therefore, qemu/atomic.h provides two primitives with slightly
|
||||
weaker constraints:
|
||||
|
||||
typeof(*ptr) atomic_mb_read(ptr)
|
||||
void atomic_mb_set(ptr, val)
|
||||
|
||||
The semantics of these primitives map to Java volatile variables,
|
||||
and are strongly related to memory barriers as used in the Linux
|
||||
kernel (see below).
|
||||
|
||||
As long as you use atomic_mb_read and atomic_mb_set, accesses cannot
|
||||
be reordered with each other, and it is also not possible to reorder
|
||||
"normal" accesses around them.
|
||||
|
||||
However, and this is the important difference between
|
||||
atomic_mb_read/atomic_mb_set and sequential consistency, it is important
|
||||
for both threads to access the same volatile variable. It is not the
|
||||
case that everything visible to thread A when it writes volatile field f
|
||||
becomes visible to thread B after it reads volatile field g. The store
|
||||
and load have to "match" (i.e., be performed on the same volatile
|
||||
field) to achieve the right semantics.
|
||||
|
||||
|
||||
These operations operate on any type that is as wide as an int or smaller.
|
||||
|
||||
|
||||
WEAK ATOMIC ACCESS AND MANUAL MEMORY BARRIERS
|
||||
=============================================
|
||||
|
||||
Compared to sequentially consistent atomic access, programming with
|
||||
weaker consistency models can be considerably more complicated.
|
||||
In general, if the algorithm you are writing includes both writes
|
||||
and reads on the same side, it is generally simpler to use sequentially
|
||||
consistent primitives.
|
||||
|
||||
When using this model, variables are accessed with:
|
||||
|
||||
- atomic_read() and atomic_set(); these prevent the compiler from
|
||||
optimizing accesses out of existence and creating unsolicited
|
||||
accesses, but do not otherwise impose any ordering on loads and
|
||||
stores: both the compiler and the processor are free to reorder
|
||||
them.
|
||||
|
||||
- atomic_load_acquire(), which guarantees the LOAD to appear to
|
||||
happen, with respect to the other components of the system,
|
||||
before all the LOAD or STORE operations specified afterwards.
|
||||
Operations coming before atomic_load_acquire() can still be
|
||||
reordered after it.
|
||||
|
||||
- atomic_store_release(), which guarantees the STORE to appear to
|
||||
happen, with respect to the other components of the system,
|
||||
after all the LOAD or STORE operations specified afterwards.
|
||||
Operations coming after atomic_store_release() can still be
|
||||
reordered after it.
|
||||
|
||||
Restrictions to the ordering of accesses can also be specified
|
||||
using the memory barrier macros: smp_rmb(), smp_wmb(), smp_mb(),
|
||||
smp_mb_acquire(), smp_mb_release(), smp_read_barrier_depends().
|
||||
|
||||
Memory barriers control the order of references to shared memory.
|
||||
They come in six kinds:
|
||||
|
||||
- smp_rmb() guarantees that all the LOAD operations specified before
|
||||
the barrier will appear to happen before all the LOAD operations
|
||||
specified after the barrier with respect to the other components of
|
||||
the system.
|
||||
|
||||
In other words, smp_rmb() puts a partial ordering on loads, but is not
|
||||
required to have any effect on stores.
|
||||
|
||||
- smp_wmb() guarantees that all the STORE operations specified before
|
||||
the barrier will appear to happen before all the STORE operations
|
||||
specified after the barrier with respect to the other components of
|
||||
the system.
|
||||
|
||||
In other words, smp_wmb() puts a partial ordering on stores, but is not
|
||||
required to have any effect on loads.
|
||||
|
||||
- smp_mb_acquire() guarantees that all the LOAD operations specified before
|
||||
the barrier will appear to happen before all the LOAD or STORE operations
|
||||
specified after the barrier with respect to the other components of
|
||||
the system.
|
||||
|
||||
- smp_mb_release() guarantees that all the STORE operations specified *after*
|
||||
the barrier will appear to happen after all the LOAD or STORE operations
|
||||
specified *before* the barrier with respect to the other components of
|
||||
the system.
|
||||
|
||||
- smp_mb() guarantees that all the LOAD and STORE operations specified
|
||||
before the barrier will appear to happen before all the LOAD and
|
||||
STORE operations specified after the barrier with respect to the other
|
||||
components of the system.
|
||||
|
||||
smp_mb() puts a partial ordering on both loads and stores. It is
|
||||
stronger than both a read and a write memory barrier; it implies both
|
||||
smp_mb_acquire() and smp_mb_release(), but it also prevents STOREs
|
||||
coming before the barrier from overtaking LOADs coming after the
|
||||
barrier and vice versa.
|
||||
|
||||
- smp_read_barrier_depends() is a weaker kind of read barrier. On
|
||||
most processors, whenever two loads are performed such that the
|
||||
second depends on the result of the first (e.g., the first load
|
||||
retrieves the address to which the second load will be directed),
|
||||
the processor will guarantee that the first LOAD will appear to happen
|
||||
before the second with respect to the other components of the system.
|
||||
However, this is not always true---for example, it was not true on
|
||||
Alpha processors. Whenever this kind of access happens to shared
|
||||
memory (that is not protected by a lock), a read barrier is needed,
|
||||
and smp_read_barrier_depends() can be used instead of smp_rmb().
|
||||
|
||||
Note that the first load really has to have a _data_ dependency and not
|
||||
a control dependency. If the address for the second load is dependent
|
||||
on the first load, but the dependency is through a conditional rather
|
||||
than actually loading the address itself, then it's a _control_
|
||||
dependency and a full read barrier or better is required.
|
||||
|
||||
|
||||
This is the set of barriers that is required *between* two atomic_read()
|
||||
and atomic_set() operations to achieve sequential consistency:
|
||||
|
||||
| 2nd operation |
|
||||
|-----------------------------------------------|
|
||||
1st operation | (after last) | atomic_read | atomic_set |
|
||||
---------------+----------------+-------------+----------------|
|
||||
(before first) | | none | smp_mb_release |
|
||||
---------------+----------------+-------------+----------------|
|
||||
atomic_read | smp_mb_acquire | smp_rmb | ** |
|
||||
---------------+----------------+-------------+----------------|
|
||||
atomic_set | none | smp_mb()*** | smp_wmb() |
|
||||
---------------+----------------+-------------+----------------|
|
||||
|
||||
* Or smp_read_barrier_depends().
|
||||
|
||||
** This requires a load-store barrier. This is achieved by
|
||||
either smp_mb_acquire() or smp_mb_release().
|
||||
|
||||
*** This requires a store-load barrier. On most machines, the only
|
||||
way to achieve this is a full barrier.
|
||||
|
||||
|
||||
You can see that the two possible definitions of atomic_mb_read()
|
||||
and atomic_mb_set() are the following:
|
||||
|
||||
1) atomic_mb_read(p) = atomic_read(p); smp_mb_acquire()
|
||||
atomic_mb_set(p, v) = smp_mb_release(); atomic_set(p, v); smp_mb()
|
||||
|
||||
2) atomic_mb_read(p) = smp_mb() atomic_read(p); smp_mb_acquire()
|
||||
atomic_mb_set(p, v) = smp_mb_release(); atomic_set(p, v);
|
||||
|
||||
Usually the former is used, because smp_mb() is expensive and a program
|
||||
normally has more reads than writes. Therefore it makes more sense to
|
||||
make atomic_mb_set() the more expensive operation.
|
||||
|
||||
There are two common cases in which atomic_mb_read and atomic_mb_set
|
||||
generate too many memory barriers, and thus it can be useful to manually
|
||||
place barriers, or use atomic_load_acquire/atomic_store_release instead:
|
||||
|
||||
- when a data structure has one thread that is always a writer
|
||||
and one thread that is always a reader, manual placement of
|
||||
memory barriers makes the write side faster. Furthermore,
|
||||
correctness is easy to check for in this case using the "pairing"
|
||||
trick that is explained below:
|
||||
|
||||
thread 1 thread 1
|
||||
------------------------- ------------------------
|
||||
(other writes)
|
||||
atomic_mb_set(&a, x) atomic_store_release(&a, x)
|
||||
atomic_mb_set(&b, y) atomic_store_release(&b, y)
|
||||
|
||||
=>
|
||||
thread 2 thread 2
|
||||
------------------------- ------------------------
|
||||
y = atomic_mb_read(&b) y = atomic_load_acquire(&b)
|
||||
x = atomic_mb_read(&a) x = atomic_load_acquire(&a)
|
||||
(other reads)
|
||||
|
||||
Note that the barrier between the stores in thread 1, and between
|
||||
the loads in thread 2, has been optimized here to a write or a
|
||||
read memory barrier respectively. On some architectures, notably
|
||||
ARMv7, smp_mb_acquire and smp_mb_release are just as expensive as
|
||||
smp_mb, but smp_rmb and/or smp_wmb are more efficient.
|
||||
|
||||
- sometimes, a thread is accessing many variables that are otherwise
|
||||
unrelated to each other (for example because, apart from the current
|
||||
thread, exactly one other thread will read or write each of these
|
||||
variables). In this case, it is possible to "hoist" the implicit
|
||||
barriers provided by atomic_mb_read() and atomic_mb_set() outside
|
||||
a loop. For example, the above definition atomic_mb_read() gives
|
||||
the following transformation:
|
||||
|
||||
n = 0; n = 0;
|
||||
for (i = 0; i < 10; i++) => for (i = 0; i < 10; i++)
|
||||
n += atomic_mb_read(&a[i]); n += atomic_read(&a[i]);
|
||||
smp_mb_acquire();
|
||||
|
||||
Similarly, atomic_mb_set() can be transformed as follows:
|
||||
|
||||
smp_mb_release();
|
||||
for (i = 0; i < 10; i++) => for (i = 0; i < 10; i++)
|
||||
atomic_mb_set(&a[i], false); atomic_set(&a[i], false);
|
||||
smp_mb();
|
||||
|
||||
|
||||
The other thread can still use atomic_mb_read()/atomic_mb_set().
|
||||
|
||||
The two tricks can be combined. In this case, splitting a loop in
|
||||
two lets you hoist the barriers out of the loops _and_ eliminate the
|
||||
expensive smp_mb():
|
||||
|
||||
smp_mb_release();
|
||||
for (i = 0; i < 10; i++) { => for (i = 0; i < 10; i++)
|
||||
atomic_mb_set(&a[i], false); atomic_set(&a[i], false);
|
||||
atomic_mb_set(&b[i], false); smb_wmb();
|
||||
} for (i = 0; i < 10; i++)
|
||||
atomic_set(&a[i], false);
|
||||
smp_mb();
|
||||
|
||||
|
||||
Memory barrier pairing
|
||||
----------------------
|
||||
|
||||
A useful rule of thumb is that memory barriers should always, or almost
|
||||
always, be paired with another barrier. In the case of QEMU, however,
|
||||
note that the other barrier may actually be in a driver that runs in
|
||||
the guest!
|
||||
|
||||
For the purposes of pairing, smp_read_barrier_depends() and smp_rmb()
|
||||
both count as read barriers. A read barrier shall pair with a write
|
||||
barrier or a full barrier; a write barrier shall pair with a read
|
||||
barrier or a full barrier. A full barrier can pair with anything.
|
||||
For example:
|
||||
|
||||
thread 1 thread 2
|
||||
=============== ===============
|
||||
a = 1;
|
||||
smp_wmb();
|
||||
b = 2; x = b;
|
||||
smp_rmb();
|
||||
y = a;
|
||||
|
||||
Note that the "writing" thread is accessing the variables in the
|
||||
opposite order as the "reading" thread. This is expected: stores
|
||||
before the write barrier will normally match the loads after the
|
||||
read barrier, and vice versa. The same is true for more than 2
|
||||
access and for data dependency barriers:
|
||||
|
||||
thread 1 thread 2
|
||||
=============== ===============
|
||||
b[2] = 1;
|
||||
smp_wmb();
|
||||
x->i = 2;
|
||||
smp_wmb();
|
||||
a = x; x = a;
|
||||
smp_read_barrier_depends();
|
||||
y = x->i;
|
||||
smp_read_barrier_depends();
|
||||
z = b[y];
|
||||
|
||||
smp_wmb() also pairs with atomic_mb_read() and smp_mb_acquire().
|
||||
and smp_rmb() also pairs with atomic_mb_set() and smp_mb_release().
|
||||
|
||||
|
||||
COMPARISON WITH LINUX KERNEL MEMORY BARRIERS
|
||||
============================================
|
||||
|
||||
Here is a list of differences between Linux kernel atomic operations
|
||||
and memory barriers, and the equivalents in QEMU:
|
||||
|
||||
- atomic operations in Linux are always on a 32-bit int type and
|
||||
use a boxed atomic_t type; atomic operations in QEMU are polymorphic
|
||||
and use normal C types.
|
||||
|
||||
- Originally, atomic_read and atomic_set in Linux gave no guarantee
|
||||
at all. Linux 4.1 updated them to implement volatile
|
||||
semantics via ACCESS_ONCE (or the more recent READ/WRITE_ONCE).
|
||||
|
||||
QEMU's atomic_read/set implement, if the compiler supports it, C11
|
||||
atomic relaxed semantics, and volatile semantics otherwise.
|
||||
Both semantics prevent the compiler from doing certain transformations;
|
||||
the difference is that atomic accesses are guaranteed to be atomic,
|
||||
while volatile accesses aren't. Thus, in the volatile case we just cross
|
||||
our fingers hoping that the compiler will generate atomic accesses,
|
||||
since we assume the variables passed are machine-word sized and
|
||||
properly aligned.
|
||||
No barriers are implied by atomic_read/set in either Linux or QEMU.
|
||||
|
||||
- atomic read-modify-write operations in Linux are of three kinds:
|
||||
|
||||
atomic_OP returns void
|
||||
atomic_OP_return returns new value of the variable
|
||||
atomic_fetch_OP returns the old value of the variable
|
||||
atomic_cmpxchg returns the old value of the variable
|
||||
|
||||
In QEMU, the second kind does not exist. Currently Linux has
|
||||
atomic_fetch_or only. QEMU provides and, or, inc, dec, add, sub.
|
||||
|
||||
- different atomic read-modify-write operations in Linux imply
|
||||
a different set of memory barriers; in QEMU, all of them enforce
|
||||
sequential consistency, which means they imply full memory barriers
|
||||
before and after the operation.
|
||||
|
||||
- Linux does not have an equivalent of atomic_mb_set(). In particular,
|
||||
note that smp_store_mb() is a little weaker than atomic_mb_set().
|
||||
atomic_mb_read() compiles to the same instructions as Linux's
|
||||
smp_load_acquire(), but this should be treated as an implementation
|
||||
detail.
|
||||
|
||||
SOURCES
|
||||
=======
|
||||
|
||||
* Documentation/memory-barriers.txt from the Linux kernel
|
||||
|
||||
* "The JSR-133 Cookbook for Compiler Writers", available at
|
||||
http://g.oswego.edu/dl/jmm/cookbook.html
|
|
@ -17,6 +17,7 @@ Contents:
|
|||
loads-stores
|
||||
memory
|
||||
migration
|
||||
atomics
|
||||
stable-process
|
||||
testing
|
||||
decodetree
|
||||
|
|
|
@ -132,7 +132,7 @@ The core RCU API is small:
|
|||
|
||||
typeof(*p) atomic_rcu_read(p);
|
||||
|
||||
atomic_rcu_read() is similar to atomic_mb_read(), but it makes
|
||||
atomic_rcu_read() is similar to atomic_load_acquire(), but it makes
|
||||
some assumptions on the code that calls it. This allows a more
|
||||
optimized implementation.
|
||||
|
||||
|
@ -154,7 +154,7 @@ The core RCU API is small:
|
|||
|
||||
void atomic_rcu_set(p, typeof(*p) v);
|
||||
|
||||
atomic_rcu_set() is also similar to atomic_mb_set(), and it also
|
||||
atomic_rcu_set() is similar to atomic_store_release(), though it also
|
||||
makes assumptions on the code that calls it in order to allow a more
|
||||
optimized implementation.
|
||||
|
||||
|
|
|
@ -948,13 +948,26 @@ DEFINE_PC_MACHINE(isapc, "isapc", pc_init_isa,
|
|||
|
||||
|
||||
#ifdef CONFIG_XEN
|
||||
static void xenfv_machine_options(MachineClass *m)
|
||||
static void xenfv_4_2_machine_options(MachineClass *m)
|
||||
{
|
||||
pc_i440fx_4_2_machine_options(m);
|
||||
m->desc = "Xen Fully-virtualized PC";
|
||||
m->max_cpus = HVM_MAX_VCPUS;
|
||||
m->default_machine_opts = "accel=xen";
|
||||
}
|
||||
|
||||
DEFINE_PC_MACHINE(xenfv, "xenfv", pc_xen_hvm_init,
|
||||
xenfv_machine_options);
|
||||
DEFINE_PC_MACHINE(xenfv_4_2, "xenfv-4.2", pc_xen_hvm_init,
|
||||
xenfv_4_2_machine_options);
|
||||
|
||||
static void xenfv_3_1_machine_options(MachineClass *m)
|
||||
{
|
||||
pc_i440fx_3_1_machine_options(m);
|
||||
m->desc = "Xen Fully-virtualized PC";
|
||||
m->alias = "xenfv";
|
||||
m->max_cpus = HVM_MAX_VCPUS;
|
||||
m->default_machine_opts = "accel=xen";
|
||||
}
|
||||
|
||||
DEFINE_PC_MACHINE(xenfv, "xenfv-3.1", pc_xen_hvm_init,
|
||||
xenfv_3_1_machine_options);
|
||||
#endif
|
||||
|
|
|
@ -2351,8 +2351,8 @@ void address_space_write_cached_slow(MemoryRegionCache *cache,
|
|||
static inline bool memory_access_is_direct(MemoryRegion *mr, bool is_write)
|
||||
{
|
||||
if (is_write) {
|
||||
return memory_region_is_ram(mr) &&
|
||||
!mr->readonly && !memory_region_is_ram_device(mr);
|
||||
return memory_region_is_ram(mr) && !mr->readonly &&
|
||||
!mr->rom_device && !memory_region_is_ram_device(mr);
|
||||
} else {
|
||||
return (memory_region_is_ram(mr) && !memory_region_is_ram_device(mr)) ||
|
||||
memory_region_is_romd(mr);
|
||||
|
|
|
@ -4315,6 +4315,11 @@ void qemu_init(int argc, char **argv, char **envp)
|
|||
"explicitly specified 'memory-backend' property");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (mem_path) {
|
||||
error_report("'-mem-path' can't be used together with"
|
||||
"'-machine memory-backend'");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
ram_size = backend_size;
|
||||
}
|
||||
|
||||
|
|
|
@ -288,7 +288,7 @@ static void add_machine_test_case(const char *mname)
|
|||
char *path, *args;
|
||||
|
||||
/* Ignore blacklisted machines */
|
||||
if (g_str_equal("xenfv", mname) || g_str_equal("xenpv", mname)) {
|
||||
if (!memcmp("xenfv", mname, 5) || g_str_equal("xenpv", mname)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -15,35 +15,6 @@
|
|||
#include "qemu/cutils.h"
|
||||
#include "libqtest.h"
|
||||
|
||||
static const char *blacklist_x86[] = {
|
||||
"xenfv", "xenpv", NULL
|
||||
};
|
||||
|
||||
static const struct {
|
||||
const char *arch;
|
||||
const char **machine;
|
||||
} blacklists[] = {
|
||||
{ "i386", blacklist_x86 },
|
||||
{ "x86_64", blacklist_x86 },
|
||||
};
|
||||
|
||||
static bool is_blacklisted(const char *arch, const char *mach)
|
||||
{
|
||||
int i;
|
||||
const char **p;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(blacklists); i++) {
|
||||
if (!strcmp(blacklists[i].arch, arch)) {
|
||||
for (p = blacklists[i].machine; *p; p++) {
|
||||
if (!strcmp(*p, mach)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static void test_properties(QTestState *qts, const char *path, bool recurse)
|
||||
{
|
||||
char *child_path;
|
||||
|
@ -108,13 +79,16 @@ static void test_machine(gconstpointer data)
|
|||
|
||||
static void add_machine_test_case(const char *mname)
|
||||
{
|
||||
const char *arch = qtest_get_arch();
|
||||
char *path;
|
||||
|
||||
if (!is_blacklisted(arch, mname)) {
|
||||
char *path = g_strdup_printf("qom/%s", mname);
|
||||
qtest_add_data_func(path, g_strdup(mname), test_machine);
|
||||
g_free(path);
|
||||
/* Ignore blacklisted machines that have known problems */
|
||||
if (!memcmp("xenfv", mname, 5) || g_str_equal("xenpv", mname)) {
|
||||
return;
|
||||
}
|
||||
|
||||
path = g_strdup_printf("qom/%s", mname);
|
||||
qtest_add_data_func(path, g_strdup(mname), test_machine);
|
||||
g_free(path);
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
|
|
|
@ -143,7 +143,7 @@ static void add_machine_test_case(const char *mname)
|
|||
char *path;
|
||||
|
||||
/* Ignore blacklisted machines that have known problems */
|
||||
if (!strcmp("xenfv", mname) || !strcmp("xenpv", mname)) {
|
||||
if (!memcmp("xenfv", mname, 5) || g_str_equal("xenpv", mname)) {
|
||||
return;
|
||||
}
|
||||
|
||||
|
|
|
@ -177,7 +177,7 @@ bool module_load_one(const char *prefix, const char *lib_name)
|
|||
char *version_dir;
|
||||
#endif
|
||||
const char *search_dir;
|
||||
char *dirs[4];
|
||||
char *dirs[5];
|
||||
char *module_name;
|
||||
int i = 0, n_dirs = 0;
|
||||
int ret;
|
||||
|
|
|
@ -492,8 +492,11 @@ static bool touch_all_pages(char *area, size_t hpagesize, size_t numpages,
|
|||
QEMU_THREAD_JOINABLE);
|
||||
addr += memset_thread[i].numpages * hpagesize;
|
||||
}
|
||||
|
||||
qemu_mutex_lock(&page_mutex);
|
||||
threads_created_flag = true;
|
||||
qemu_cond_broadcast(&page_cond);
|
||||
qemu_mutex_unlock(&page_mutex);
|
||||
|
||||
for (i = 0; i < memset_num_threads; i++) {
|
||||
qemu_thread_join(&memset_thread[i].pgthread);
|
||||
|
|
Loading…
Reference in New Issue