am ac322da6: Atomic/SMP update.
This commit is contained in:
commit
0bc248f983
|
@ -0,0 +1,101 @@
|
|||
/*
|
||||
* Copyright (C) 2010 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef ANDROID_CUTILS_ATOMIC_INLINE_H
|
||||
#define ANDROID_CUTILS_ATOMIC_INLINE_H
|
||||
|
||||
/*
|
||||
* Inline declarations and macros for some special-purpose atomic
|
||||
* operations. These are intended for rare circumstances where a
|
||||
* memory barrier needs to be issued inline rather than as a function
|
||||
* call.
|
||||
*
|
||||
* Most code should not use these.
|
||||
*
|
||||
* Anything that does include this file must set ANDROID_SMP to either
|
||||
* 0 or 1, indicating compilation for UP or SMP, respectively.
|
||||
*/
|
||||
|
||||
#if !defined(ANDROID_SMP)
|
||||
# error "Must define ANDROID_SMP before including atomic-inline.h"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Define the full memory barrier for an SMP system. This is
|
||||
* platform-specific.
|
||||
*/
|
||||
|
||||
#ifdef __arm__
|
||||
#include <machine/cpu-features.h>
|
||||
|
||||
/*
|
||||
* For ARMv6K we need to issue a specific MCR instead of the DMB, since
|
||||
* that wasn't added until v7. For anything older, SMP isn't relevant.
|
||||
* Since we don't have an ARMv6K to test with, we're not going to deal
|
||||
* with that now.
|
||||
*
|
||||
* The DMB instruction is found in the ARM and Thumb2 instruction sets.
|
||||
* This will fail on plain 16-bit Thumb.
|
||||
*/
|
||||
#if defined(__ARM_HAVE_DMB)
|
||||
# define __android_membar_full_smp() \
|
||||
do { __asm__ __volatile__ ("dmb" ::: "memory"); } while (0)
|
||||
#else
|
||||
# define __android_membar_full_smp() ARM_SMP_defined_but_no_DMB()
|
||||
#endif
|
||||
|
||||
#elif defined(__i386__) || defined(__x86_64__)
|
||||
/*
|
||||
* For recent x86, we can use the SSE2 mfence instruction.
|
||||
*/
|
||||
# define __android_membar_full_smp() \
|
||||
do { __asm__ __volatile__ ("mfence" ::: "memory"); } while (0)
|
||||
|
||||
#else
|
||||
/*
|
||||
* Implementation not defined for this platform. Hopefully we're building
|
||||
* in uniprocessor mode.
|
||||
*/
|
||||
# define __android_membar_full_smp() SMP_barrier_not_defined_for_platform()
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* Full barrier. On uniprocessors this is just a compiler reorder barrier,
|
||||
* which ensures that the statements appearing above the barrier in the C/C++
|
||||
* code will be issued after the statements appearing below the barrier.
|
||||
*
|
||||
* For SMP this also includes a memory barrier instruction. On an ARM
|
||||
* CPU this means that the current core will flush pending writes, wait
|
||||
* for pending reads to complete, and discard any cached reads that could
|
||||
* be stale. Other CPUs may do less, but the end result is equivalent.
|
||||
*/
|
||||
#if ANDROID_SMP != 0
|
||||
# define android_membar_full() __android_membar_full_smp()
|
||||
#else
|
||||
# define android_membar_full() \
|
||||
do { __asm__ __volatile__ ("" ::: "memory"); } while (0)
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
#endif // ANDROID_CUTILS_ATOMIC_INLINE_H
|
|
@ -25,10 +25,8 @@ extern "C" {
|
|||
#endif
|
||||
|
||||
/*
|
||||
* NOTE: memory shared between threads is synchronized by all atomic operations
|
||||
* below, this means that no explicit memory barrier is required: all reads or
|
||||
* writes issued before android_atomic_* operations are guaranteed to complete
|
||||
* before the atomic operation takes place.
|
||||
* Unless otherwise noted, the operations below perform a full fence before
|
||||
* the atomic operation on SMP systems ("release" semantics).
|
||||
*/
|
||||
|
||||
void android_atomic_write(int32_t value, volatile int32_t* addr);
|
||||
|
@ -37,7 +35,6 @@ void android_atomic_write(int32_t value, volatile int32_t* addr);
|
|||
* all these atomic operations return the previous value
|
||||
*/
|
||||
|
||||
|
||||
int32_t android_atomic_inc(volatile int32_t* addr);
|
||||
int32_t android_atomic_dec(volatile int32_t* addr);
|
||||
|
||||
|
@ -48,30 +45,32 @@ int32_t android_atomic_or(int32_t value, volatile int32_t* addr);
|
|||
int32_t android_atomic_swap(int32_t value, volatile int32_t* addr);
|
||||
|
||||
/*
|
||||
* NOTE: Two "quasiatomic" operations on the exact same memory address
|
||||
* are guaranteed to operate atomically with respect to each other,
|
||||
* but no guarantees are made about quasiatomic operations mixed with
|
||||
* non-quasiatomic operations on the same address, nor about
|
||||
* quasiatomic operations that are performed on partially-overlapping
|
||||
* memory.
|
||||
* cmpxchg returns zero if the new value was successfully written. This
|
||||
* will only happen when *addr == oldvalue.
|
||||
*
|
||||
* (The return value is inverted from implementations on other platforms, but
|
||||
* matches the ARM ldrex/strex sematics. Note also this is a compare-and-set
|
||||
* operation, not a compare-and-exchange operation, since we don't return
|
||||
* the original value.)
|
||||
*/
|
||||
|
||||
int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr);
|
||||
int64_t android_quasiatomic_read_64(volatile int64_t* addr);
|
||||
|
||||
/*
|
||||
* cmpxchg return a non zero value if the exchange was NOT performed,
|
||||
* in other words if oldvalue != *addr
|
||||
*/
|
||||
|
||||
int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue,
|
||||
volatile int32_t* addr);
|
||||
|
||||
int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
|
||||
volatile int64_t* addr);
|
||||
/*
|
||||
* Same basic operation as android_atomic_cmpxchg, but with "acquire"
|
||||
* semantics. The memory barrier, if required, is performed after the
|
||||
* new value is stored. Useful for acquiring a spin lock.
|
||||
*/
|
||||
int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue,
|
||||
volatile int32_t* addr);
|
||||
|
||||
/*
|
||||
* Perform an atomic store with "release" semantics. The memory barrier,
|
||||
* if required, is performed before the store instruction. Useful for
|
||||
* releasing a spin lock.
|
||||
*/
|
||||
#define android_atomic_release_store android_atomic_write
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
|
|
@ -16,6 +16,13 @@
|
|||
LOCAL_PATH := $(my-dir)
|
||||
include $(CLEAR_VARS)
|
||||
|
||||
ifeq ($(TARGET_CPU_SMP),true)
|
||||
targetSmpFlag := -DANDROID_SMP=1
|
||||
else
|
||||
targetSmpFlag := -DANDROID_SMP=0
|
||||
endif
|
||||
hostSmpFlag := -DANDROID_SMP=0
|
||||
|
||||
commonSources := \
|
||||
array.c \
|
||||
hashmap.c \
|
||||
|
@ -80,6 +87,7 @@ LOCAL_MODULE := libcutils
|
|||
LOCAL_SRC_FILES := $(commonSources) $(commonHostSources)
|
||||
LOCAL_LDLIBS := -lpthread
|
||||
LOCAL_STATIC_LIBRARIES := liblog
|
||||
LOCAL_CFLAGS += $(hostSmpFlag)
|
||||
include $(BUILD_HOST_STATIC_LIBRARY)
|
||||
|
||||
|
||||
|
@ -92,6 +100,7 @@ LOCAL_MODULE := libcutils
|
|||
LOCAL_SRC_FILES := $(commonSources) $(commonHostSources) memory.c dlmalloc_stubs.c
|
||||
LOCAL_LDLIBS := -lpthread
|
||||
LOCAL_SHARED_LIBRARIES := liblog
|
||||
LOCAL_CFLAGS += $(targetSmpFlag)
|
||||
include $(BUILD_SHARED_LIBRARY)
|
||||
|
||||
else #!sim
|
||||
|
@ -114,12 +123,14 @@ endif # !arm
|
|||
|
||||
LOCAL_C_INCLUDES := $(KERNEL_HEADERS)
|
||||
LOCAL_STATIC_LIBRARIES := liblog
|
||||
LOCAL_CFLAGS += $(targetSmpFlag)
|
||||
include $(BUILD_STATIC_LIBRARY)
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
LOCAL_MODULE := libcutils
|
||||
LOCAL_WHOLE_STATIC_LIBRARIES := libcutils
|
||||
LOCAL_SHARED_LIBRARIES := liblog
|
||||
LOCAL_CFLAGS += $(targetSmpFlag)
|
||||
include $(BUILD_SHARED_LIBRARY)
|
||||
|
||||
endif #!sim
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/* TODO: insert memory barriers on SMP */
|
||||
|
||||
#include <machine/cpu-features.h>
|
||||
|
||||
/*
|
||||
|
@ -43,6 +45,8 @@
|
|||
|
||||
.global android_atomic_cmpxchg
|
||||
.type android_atomic_cmpxchg, %function
|
||||
.global android_atomic_acquire_cmpxchg
|
||||
.type android_atomic_acquire_cmpxchg, %function
|
||||
|
||||
/*
|
||||
* ----------------------------------------------------------------------------
|
||||
|
@ -237,7 +241,7 @@ android_atomic_or:
|
|||
|
||||
/* replaced swp instruction with ldrex/strex for ARMv6 & ARMv7 */
|
||||
android_atomic_swap:
|
||||
#if defined (_ARM_HAVE_LDREX_STREX)
|
||||
#if defined (__ARM_HAVE_LDREX_STREX)
|
||||
1: ldrex r2, [r1]
|
||||
strex r3, r0, [r1]
|
||||
teq r3, #0
|
||||
|
@ -256,6 +260,7 @@ android_atomic_swap:
|
|||
* output: r0 = 0 (xchg done) or non-zero (xchg not done)
|
||||
*/
|
||||
|
||||
android_atomic_acquire_cmpxchg:
|
||||
android_atomic_cmpxchg:
|
||||
.fnstart
|
||||
.save {r4, lr}
|
||||
|
@ -282,10 +287,3 @@ android_atomic_cmpxchg:
|
|||
bx lr
|
||||
.fnend
|
||||
|
||||
/*
|
||||
* ----------------------------------------------------------------------------
|
||||
* android_atomic_cmpxchg_64
|
||||
* input: r0-r1=oldvalue, r2-r3=newvalue, arg4 (on stack)=address
|
||||
* output: r0 = 0 (xchg done) or non-zero (xchg not done)
|
||||
*/
|
||||
/* TODO: NEED IMPLEMENTATION FOR THIS ARCHITECTURE */
|
||||
|
|
|
@ -118,42 +118,8 @@ int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue,
|
|||
return result;
|
||||
}
|
||||
|
||||
int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) {
|
||||
int64_t oldValue;
|
||||
pthread_mutex_t* lock = SWAP_LOCK(addr);
|
||||
|
||||
pthread_mutex_lock(lock);
|
||||
|
||||
oldValue = *addr;
|
||||
*addr = value;
|
||||
|
||||
pthread_mutex_unlock(lock);
|
||||
return oldValue;
|
||||
int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue,
|
||||
volatile int32_t* addr) {
|
||||
return android_atomic_cmpxchg(oldValue, newValue, addr);
|
||||
}
|
||||
|
||||
int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
|
||||
volatile int64_t* addr) {
|
||||
int result;
|
||||
pthread_mutex_t* lock = SWAP_LOCK(addr);
|
||||
|
||||
pthread_mutex_lock(lock);
|
||||
|
||||
if (*addr == oldvalue) {
|
||||
*addr = newvalue;
|
||||
result = 0;
|
||||
} else {
|
||||
result = 1;
|
||||
}
|
||||
pthread_mutex_unlock(lock);
|
||||
return result;
|
||||
}
|
||||
|
||||
int64_t android_quasiatomic_read_64(volatile int64_t* addr) {
|
||||
int64_t result;
|
||||
pthread_mutex_t* lock = SWAP_LOCK(addr);
|
||||
|
||||
pthread_mutex_lock(lock);
|
||||
result = *addr;
|
||||
pthread_mutex_unlock(lock);
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
|
||||
#include <cutils/atomic.h>
|
||||
#include <cutils/atomic-inline.h>
|
||||
#ifdef HAVE_WIN32_THREADS
|
||||
#include <windows.h>
|
||||
#else
|
||||
|
@ -70,40 +71,19 @@ int32_t android_atomic_swap(int32_t value, volatile int32_t* addr) {
|
|||
}
|
||||
|
||||
int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, volatile int32_t* addr) {
|
||||
/* OS X CAS returns zero on failure; invert to return zero on success */
|
||||
return OSAtomicCompareAndSwap32Barrier(oldvalue, newvalue, (int32_t*)addr) == 0;
|
||||
}
|
||||
|
||||
#if defined(__ppc__) \
|
||||
|| defined(__PPC__) \
|
||||
|| defined(__powerpc__) \
|
||||
|| defined(__powerpc) \
|
||||
|| defined(__POWERPC__) \
|
||||
|| defined(_M_PPC) \
|
||||
|| defined(__PPC)
|
||||
#define NEED_QUASIATOMICS 1
|
||||
#else
|
||||
|
||||
int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
|
||||
volatile int64_t* addr) {
|
||||
return OSAtomicCompareAndSwap64Barrier(oldvalue, newvalue,
|
||||
(int64_t*)addr) == 0;
|
||||
int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue,
|
||||
volatile int32_t* addr) {
|
||||
int result = (OSAtomicCompareAndSwap32(oldvalue, newvalue, (int32_t*)addr) == 0);
|
||||
if (!result) {
|
||||
/* success, perform barrier */
|
||||
OSMemoryBarrier();
|
||||
}
|
||||
}
|
||||
|
||||
int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) {
|
||||
int64_t oldValue;
|
||||
do {
|
||||
oldValue = *addr;
|
||||
} while (android_quasiatomic_cmpxchg_64(oldValue, value, addr));
|
||||
return oldValue;
|
||||
}
|
||||
|
||||
int64_t android_quasiatomic_read_64(volatile int64_t* addr) {
|
||||
return OSAtomicAdd64Barrier(0, addr);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/*****************************************************************************/
|
||||
#elif defined(__i386__) || defined(__x86_64__)
|
||||
|
||||
|
@ -163,6 +143,7 @@ int32_t android_atomic_swap(int32_t value, volatile int32_t* addr) {
|
|||
}
|
||||
|
||||
int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, volatile int32_t* addr) {
|
||||
android_membar_full();
|
||||
int xchg;
|
||||
asm volatile
|
||||
(
|
||||
|
@ -175,75 +156,25 @@ int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, volatile int32_t*
|
|||
return xchg;
|
||||
}
|
||||
|
||||
#define NEED_QUASIATOMICS 1
|
||||
int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue,
|
||||
volatile int32_t* addr) {
|
||||
int xchg;
|
||||
asm volatile
|
||||
(
|
||||
" lock; cmpxchg %%ecx, (%%edx);"
|
||||
" setne %%al;"
|
||||
" andl $1, %%eax"
|
||||
: "=a" (xchg)
|
||||
: "a" (oldvalue), "c" (newvalue), "d" (addr)
|
||||
);
|
||||
android_membar_full();
|
||||
return xchg;
|
||||
}
|
||||
|
||||
|
||||
/*****************************************************************************/
|
||||
#elif __arm__
|
||||
// Most of the implementation is in atomic-android-arm.s.
|
||||
|
||||
// on the device, we implement the 64-bit atomic operations through
|
||||
// mutex locking. normally, this is bad because we must initialize
|
||||
// a pthread_mutex_t before being able to use it, and this means
|
||||
// having to do an initialization check on each function call, and
|
||||
// that's where really ugly things begin...
|
||||
//
|
||||
// BUT, as a special twist, we take advantage of the fact that in our
|
||||
// pthread library, a mutex is simply a volatile word whose value is always
|
||||
// initialized to 0. In other words, simply declaring a static mutex
|
||||
// object initializes it !
|
||||
//
|
||||
// another twist is that we use a small array of mutexes to dispatch
|
||||
// the contention locks from different memory addresses
|
||||
//
|
||||
|
||||
#include <pthread.h>
|
||||
|
||||
#define SWAP_LOCK_COUNT 32U
|
||||
static pthread_mutex_t _swap_locks[SWAP_LOCK_COUNT];
|
||||
|
||||
#define SWAP_LOCK(addr) \
|
||||
&_swap_locks[((unsigned)(void*)(addr) >> 3U) % SWAP_LOCK_COUNT]
|
||||
|
||||
|
||||
int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) {
|
||||
int64_t oldValue;
|
||||
pthread_mutex_t* lock = SWAP_LOCK(addr);
|
||||
|
||||
pthread_mutex_lock(lock);
|
||||
|
||||
oldValue = *addr;
|
||||
*addr = value;
|
||||
|
||||
pthread_mutex_unlock(lock);
|
||||
return oldValue;
|
||||
}
|
||||
|
||||
int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
|
||||
volatile int64_t* addr) {
|
||||
int result;
|
||||
pthread_mutex_t* lock = SWAP_LOCK(addr);
|
||||
|
||||
pthread_mutex_lock(lock);
|
||||
|
||||
if (*addr == oldvalue) {
|
||||
*addr = newvalue;
|
||||
result = 0;
|
||||
} else {
|
||||
result = 1;
|
||||
}
|
||||
pthread_mutex_unlock(lock);
|
||||
return result;
|
||||
}
|
||||
|
||||
int64_t android_quasiatomic_read_64(volatile int64_t* addr) {
|
||||
int64_t result;
|
||||
pthread_mutex_t* lock = SWAP_LOCK(addr);
|
||||
|
||||
pthread_mutex_lock(lock);
|
||||
result = *addr;
|
||||
pthread_mutex_unlock(lock);
|
||||
return result;
|
||||
}
|
||||
// implementation for ARM is in atomic-android-arm.s.
|
||||
|
||||
/*****************************************************************************/
|
||||
#elif __sh__
|
||||
|
@ -255,85 +186,3 @@ int64_t android_quasiatomic_read_64(volatile int64_t* addr) {
|
|||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if NEED_QUASIATOMICS
|
||||
|
||||
/* Note that a spinlock is *not* a good idea in general
|
||||
* since they can introduce subtle issues. For example,
|
||||
* a real-time thread trying to acquire a spinlock already
|
||||
* acquired by another thread will never yeld, making the
|
||||
* CPU loop endlessly!
|
||||
*
|
||||
* However, this code is only used on the Linux simulator
|
||||
* so it's probably ok for us.
|
||||
*
|
||||
* The alternative is to use a pthread mutex, but
|
||||
* these must be initialized before being used, and
|
||||
* then you have the problem of lazily initializing
|
||||
* a mutex without any other synchronization primitive.
|
||||
*/
|
||||
|
||||
/* global spinlock for all 64-bit quasiatomic operations */
|
||||
static int32_t quasiatomic_spinlock = 0;
|
||||
|
||||
int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
|
||||
volatile int64_t* addr) {
|
||||
int result;
|
||||
|
||||
while (android_atomic_cmpxchg(0, 1, &quasiatomic_spinlock)) {
|
||||
#ifdef HAVE_WIN32_THREADS
|
||||
Sleep(0);
|
||||
#else
|
||||
sched_yield();
|
||||
#endif
|
||||
}
|
||||
|
||||
if (*addr == oldvalue) {
|
||||
*addr = newvalue;
|
||||
result = 0;
|
||||
} else {
|
||||
result = 1;
|
||||
}
|
||||
|
||||
android_atomic_swap(0, &quasiatomic_spinlock);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int64_t android_quasiatomic_read_64(volatile int64_t* addr) {
|
||||
int64_t result;
|
||||
|
||||
while (android_atomic_cmpxchg(0, 1, &quasiatomic_spinlock)) {
|
||||
#ifdef HAVE_WIN32_THREADS
|
||||
Sleep(0);
|
||||
#else
|
||||
sched_yield();
|
||||
#endif
|
||||
}
|
||||
|
||||
result = *addr;
|
||||
android_atomic_swap(0, &quasiatomic_spinlock);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) {
|
||||
int64_t result;
|
||||
|
||||
while (android_atomic_cmpxchg(0, 1, &quasiatomic_spinlock)) {
|
||||
#ifdef HAVE_WIN32_THREADS
|
||||
Sleep(0);
|
||||
#else
|
||||
sched_yield();
|
||||
#endif
|
||||
}
|
||||
|
||||
result = *addr;
|
||||
*addr = value;
|
||||
android_atomic_swap(0, &quasiatomic_spinlock);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Reference in New Issue