diff --git a/include/cutils/atomic-inline.h b/include/cutils/atomic-inline.h new file mode 100644 index 000000000..4f5ddf761 --- /dev/null +++ b/include/cutils/atomic-inline.h @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2010 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ANDROID_CUTILS_ATOMIC_INLINE_H +#define ANDROID_CUTILS_ATOMIC_INLINE_H + +/* + * Inline declarations and macros for some special-purpose atomic + * operations. These are intended for rare circumstances where a + * memory barrier needs to be issued inline rather than as a function + * call. + * + * Most code should not use these. + * + * Anything that does include this file must set ANDROID_SMP to either + * 0 or 1, indicating compilation for UP or SMP, respectively. + */ + +#if !defined(ANDROID_SMP) +# error "Must define ANDROID_SMP before including atomic-inline.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Define the full memory barrier for an SMP system. This is + * platform-specific. + */ + +#ifdef __arm__ +#include + +/* + * For ARMv6K we need to issue a specific MCR instead of the DMB, since + * that wasn't added until v7. For anything older, SMP isn't relevant. + * Since we don't have an ARMv6K to test with, we're not going to deal + * with that now. + * + * The DMB instruction is found in the ARM and Thumb2 instruction sets. + * This will fail on plain 16-bit Thumb. + */ +#if defined(__ARM_HAVE_DMB) +# define __android_membar_full_smp() \ + do { __asm__ __volatile__ ("dmb" ::: "memory"); } while (0) +#else +# define __android_membar_full_smp() ARM_SMP_defined_but_no_DMB() +#endif + +#elif defined(__i386__) || defined(__x86_64__) +/* + * For recent x86, we can use the SSE2 mfence instruction. + */ +# define __android_membar_full_smp() \ + do { __asm__ __volatile__ ("mfence" ::: "memory"); } while (0) + +#else +/* + * Implementation not defined for this platform. Hopefully we're building + * in uniprocessor mode. + */ +# define __android_membar_full_smp() SMP_barrier_not_defined_for_platform() +#endif + + +/* + * Full barrier. On uniprocessors this is just a compiler reorder barrier, + * which ensures that the statements appearing above the barrier in the C/C++ + * code will be issued after the statements appearing below the barrier. + * + * For SMP this also includes a memory barrier instruction. On an ARM + * CPU this means that the current core will flush pending writes, wait + * for pending reads to complete, and discard any cached reads that could + * be stale. Other CPUs may do less, but the end result is equivalent. + */ +#if ANDROID_SMP != 0 +# define android_membar_full() __android_membar_full_smp() +#else +# define android_membar_full() \ + do { __asm__ __volatile__ ("" ::: "memory"); } while (0) +#endif + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // ANDROID_CUTILS_ATOMIC_INLINE_H diff --git a/include/cutils/atomic.h b/include/cutils/atomic.h index 5694d66ac..8e12902b0 100644 --- a/include/cutils/atomic.h +++ b/include/cutils/atomic.h @@ -25,10 +25,8 @@ extern "C" { #endif /* - * NOTE: memory shared between threads is synchronized by all atomic operations - * below, this means that no explicit memory barrier is required: all reads or - * writes issued before android_atomic_* operations are guaranteed to complete - * before the atomic operation takes place. + * Unless otherwise noted, the operations below perform a full fence before + * the atomic operation on SMP systems ("release" semantics). */ void android_atomic_write(int32_t value, volatile int32_t* addr); @@ -37,7 +35,6 @@ void android_atomic_write(int32_t value, volatile int32_t* addr); * all these atomic operations return the previous value */ - int32_t android_atomic_inc(volatile int32_t* addr); int32_t android_atomic_dec(volatile int32_t* addr); @@ -48,30 +45,32 @@ int32_t android_atomic_or(int32_t value, volatile int32_t* addr); int32_t android_atomic_swap(int32_t value, volatile int32_t* addr); /* - * NOTE: Two "quasiatomic" operations on the exact same memory address - * are guaranteed to operate atomically with respect to each other, - * but no guarantees are made about quasiatomic operations mixed with - * non-quasiatomic operations on the same address, nor about - * quasiatomic operations that are performed on partially-overlapping - * memory. + * cmpxchg returns zero if the new value was successfully written. This + * will only happen when *addr == oldvalue. + * + * (The return value is inverted from implementations on other platforms, but + * matches the ARM ldrex/strex sematics. Note also this is a compare-and-set + * operation, not a compare-and-exchange operation, since we don't return + * the original value.) */ - -int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr); -int64_t android_quasiatomic_read_64(volatile int64_t* addr); - -/* - * cmpxchg return a non zero value if the exchange was NOT performed, - * in other words if oldvalue != *addr - */ - int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, volatile int32_t* addr); -int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue, - volatile int64_t* addr); +/* + * Same basic operation as android_atomic_cmpxchg, but with "acquire" + * semantics. The memory barrier, if required, is performed after the + * new value is stored. Useful for acquiring a spin lock. + */ +int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue, + volatile int32_t* addr); +/* + * Perform an atomic store with "release" semantics. The memory barrier, + * if required, is performed before the store instruction. Useful for + * releasing a spin lock. + */ +#define android_atomic_release_store android_atomic_write - #ifdef __cplusplus } // extern "C" #endif diff --git a/libcutils/Android.mk b/libcutils/Android.mk index 4c45cc92e..5b05a1e51 100644 --- a/libcutils/Android.mk +++ b/libcutils/Android.mk @@ -16,6 +16,13 @@ LOCAL_PATH := $(my-dir) include $(CLEAR_VARS) +ifeq ($(TARGET_CPU_SMP),true) + targetSmpFlag := -DANDROID_SMP=1 +else + targetSmpFlag := -DANDROID_SMP=0 +endif +hostSmpFlag := -DANDROID_SMP=0 + commonSources := \ array.c \ hashmap.c \ @@ -80,6 +87,7 @@ LOCAL_MODULE := libcutils LOCAL_SRC_FILES := $(commonSources) $(commonHostSources) LOCAL_LDLIBS := -lpthread LOCAL_STATIC_LIBRARIES := liblog +LOCAL_CFLAGS += $(hostSmpFlag) include $(BUILD_HOST_STATIC_LIBRARY) @@ -92,6 +100,7 @@ LOCAL_MODULE := libcutils LOCAL_SRC_FILES := $(commonSources) $(commonHostSources) memory.c dlmalloc_stubs.c LOCAL_LDLIBS := -lpthread LOCAL_SHARED_LIBRARIES := liblog +LOCAL_CFLAGS += $(targetSmpFlag) include $(BUILD_SHARED_LIBRARY) else #!sim @@ -114,12 +123,14 @@ endif # !arm LOCAL_C_INCLUDES := $(KERNEL_HEADERS) LOCAL_STATIC_LIBRARIES := liblog +LOCAL_CFLAGS += $(targetSmpFlag) include $(BUILD_STATIC_LIBRARY) include $(CLEAR_VARS) LOCAL_MODULE := libcutils LOCAL_WHOLE_STATIC_LIBRARIES := libcutils LOCAL_SHARED_LIBRARIES := liblog +LOCAL_CFLAGS += $(targetSmpFlag) include $(BUILD_SHARED_LIBRARY) endif #!sim diff --git a/libcutils/atomic-android-arm.S b/libcutils/atomic-android-arm.S index 1dd2363d6..f918990c8 100644 --- a/libcutils/atomic-android-arm.S +++ b/libcutils/atomic-android-arm.S @@ -14,6 +14,8 @@ * limitations under the License. */ +/* TODO: insert memory barriers on SMP */ + #include /* @@ -43,6 +45,8 @@ .global android_atomic_cmpxchg .type android_atomic_cmpxchg, %function + .global android_atomic_acquire_cmpxchg + .type android_atomic_acquire_cmpxchg, %function /* * ---------------------------------------------------------------------------- @@ -237,7 +241,7 @@ android_atomic_or: /* replaced swp instruction with ldrex/strex for ARMv6 & ARMv7 */ android_atomic_swap: -#if defined (_ARM_HAVE_LDREX_STREX) +#if defined (__ARM_HAVE_LDREX_STREX) 1: ldrex r2, [r1] strex r3, r0, [r1] teq r3, #0 @@ -256,6 +260,7 @@ android_atomic_swap: * output: r0 = 0 (xchg done) or non-zero (xchg not done) */ +android_atomic_acquire_cmpxchg: android_atomic_cmpxchg: .fnstart .save {r4, lr} @@ -282,10 +287,3 @@ android_atomic_cmpxchg: bx lr .fnend -/* - * ---------------------------------------------------------------------------- - * android_atomic_cmpxchg_64 - * input: r0-r1=oldvalue, r2-r3=newvalue, arg4 (on stack)=address - * output: r0 = 0 (xchg done) or non-zero (xchg not done) - */ -/* TODO: NEED IMPLEMENTATION FOR THIS ARCHITECTURE */ diff --git a/libcutils/atomic-android-sh.c b/libcutils/atomic-android-sh.c index acbea976d..d95b02bdb 100644 --- a/libcutils/atomic-android-sh.c +++ b/libcutils/atomic-android-sh.c @@ -118,42 +118,8 @@ int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, return result; } -int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) { - int64_t oldValue; - pthread_mutex_t* lock = SWAP_LOCK(addr); - - pthread_mutex_lock(lock); - - oldValue = *addr; - *addr = value; - - pthread_mutex_unlock(lock); - return oldValue; +int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue, + volatile int32_t* addr) { + return android_atomic_cmpxchg(oldValue, newValue, addr); } -int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue, - volatile int64_t* addr) { - int result; - pthread_mutex_t* lock = SWAP_LOCK(addr); - - pthread_mutex_lock(lock); - - if (*addr == oldvalue) { - *addr = newvalue; - result = 0; - } else { - result = 1; - } - pthread_mutex_unlock(lock); - return result; -} - -int64_t android_quasiatomic_read_64(volatile int64_t* addr) { - int64_t result; - pthread_mutex_t* lock = SWAP_LOCK(addr); - - pthread_mutex_lock(lock); - result = *addr; - pthread_mutex_unlock(lock); - return result; -} diff --git a/libcutils/atomic.c b/libcutils/atomic.c index 41faaa282..d81890614 100644 --- a/libcutils/atomic.c +++ b/libcutils/atomic.c @@ -15,6 +15,7 @@ */ #include +#include #ifdef HAVE_WIN32_THREADS #include #else @@ -70,40 +71,19 @@ int32_t android_atomic_swap(int32_t value, volatile int32_t* addr) { } int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, volatile int32_t* addr) { + /* OS X CAS returns zero on failure; invert to return zero on success */ return OSAtomicCompareAndSwap32Barrier(oldvalue, newvalue, (int32_t*)addr) == 0; } -#if defined(__ppc__) \ - || defined(__PPC__) \ - || defined(__powerpc__) \ - || defined(__powerpc) \ - || defined(__POWERPC__) \ - || defined(_M_PPC) \ - || defined(__PPC) -#define NEED_QUASIATOMICS 1 -#else - -int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue, - volatile int64_t* addr) { - return OSAtomicCompareAndSwap64Barrier(oldvalue, newvalue, - (int64_t*)addr) == 0; +int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue, + volatile int32_t* addr) { + int result = (OSAtomicCompareAndSwap32(oldvalue, newvalue, (int32_t*)addr) == 0); + if (!result) { + /* success, perform barrier */ + OSMemoryBarrier(); + } } -int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) { - int64_t oldValue; - do { - oldValue = *addr; - } while (android_quasiatomic_cmpxchg_64(oldValue, value, addr)); - return oldValue; -} - -int64_t android_quasiatomic_read_64(volatile int64_t* addr) { - return OSAtomicAdd64Barrier(0, addr); -} - -#endif - - /*****************************************************************************/ #elif defined(__i386__) || defined(__x86_64__) @@ -163,6 +143,7 @@ int32_t android_atomic_swap(int32_t value, volatile int32_t* addr) { } int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, volatile int32_t* addr) { + android_membar_full(); int xchg; asm volatile ( @@ -175,75 +156,25 @@ int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, volatile int32_t* return xchg; } -#define NEED_QUASIATOMICS 1 +int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue, + volatile int32_t* addr) { + int xchg; + asm volatile + ( + " lock; cmpxchg %%ecx, (%%edx);" + " setne %%al;" + " andl $1, %%eax" + : "=a" (xchg) + : "a" (oldvalue), "c" (newvalue), "d" (addr) + ); + android_membar_full(); + return xchg; +} + /*****************************************************************************/ #elif __arm__ -// Most of the implementation is in atomic-android-arm.s. - -// on the device, we implement the 64-bit atomic operations through -// mutex locking. normally, this is bad because we must initialize -// a pthread_mutex_t before being able to use it, and this means -// having to do an initialization check on each function call, and -// that's where really ugly things begin... -// -// BUT, as a special twist, we take advantage of the fact that in our -// pthread library, a mutex is simply a volatile word whose value is always -// initialized to 0. In other words, simply declaring a static mutex -// object initializes it ! -// -// another twist is that we use a small array of mutexes to dispatch -// the contention locks from different memory addresses -// - -#include - -#define SWAP_LOCK_COUNT 32U -static pthread_mutex_t _swap_locks[SWAP_LOCK_COUNT]; - -#define SWAP_LOCK(addr) \ - &_swap_locks[((unsigned)(void*)(addr) >> 3U) % SWAP_LOCK_COUNT] - - -int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) { - int64_t oldValue; - pthread_mutex_t* lock = SWAP_LOCK(addr); - - pthread_mutex_lock(lock); - - oldValue = *addr; - *addr = value; - - pthread_mutex_unlock(lock); - return oldValue; -} - -int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue, - volatile int64_t* addr) { - int result; - pthread_mutex_t* lock = SWAP_LOCK(addr); - - pthread_mutex_lock(lock); - - if (*addr == oldvalue) { - *addr = newvalue; - result = 0; - } else { - result = 1; - } - pthread_mutex_unlock(lock); - return result; -} - -int64_t android_quasiatomic_read_64(volatile int64_t* addr) { - int64_t result; - pthread_mutex_t* lock = SWAP_LOCK(addr); - - pthread_mutex_lock(lock); - result = *addr; - pthread_mutex_unlock(lock); - return result; -} +// implementation for ARM is in atomic-android-arm.s. /*****************************************************************************/ #elif __sh__ @@ -255,85 +186,3 @@ int64_t android_quasiatomic_read_64(volatile int64_t* addr) { #endif - - -#if NEED_QUASIATOMICS - -/* Note that a spinlock is *not* a good idea in general - * since they can introduce subtle issues. For example, - * a real-time thread trying to acquire a spinlock already - * acquired by another thread will never yeld, making the - * CPU loop endlessly! - * - * However, this code is only used on the Linux simulator - * so it's probably ok for us. - * - * The alternative is to use a pthread mutex, but - * these must be initialized before being used, and - * then you have the problem of lazily initializing - * a mutex without any other synchronization primitive. - */ - -/* global spinlock for all 64-bit quasiatomic operations */ -static int32_t quasiatomic_spinlock = 0; - -int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue, - volatile int64_t* addr) { - int result; - - while (android_atomic_cmpxchg(0, 1, &quasiatomic_spinlock)) { -#ifdef HAVE_WIN32_THREADS - Sleep(0); -#else - sched_yield(); -#endif - } - - if (*addr == oldvalue) { - *addr = newvalue; - result = 0; - } else { - result = 1; - } - - android_atomic_swap(0, &quasiatomic_spinlock); - - return result; -} - -int64_t android_quasiatomic_read_64(volatile int64_t* addr) { - int64_t result; - - while (android_atomic_cmpxchg(0, 1, &quasiatomic_spinlock)) { -#ifdef HAVE_WIN32_THREADS - Sleep(0); -#else - sched_yield(); -#endif - } - - result = *addr; - android_atomic_swap(0, &quasiatomic_spinlock); - - return result; -} - -int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) { - int64_t result; - - while (android_atomic_cmpxchg(0, 1, &quasiatomic_spinlock)) { -#ifdef HAVE_WIN32_THREADS - Sleep(0); -#else - sched_yield(); -#endif - } - - result = *addr; - *addr = value; - android_atomic_swap(0, &quasiatomic_spinlock); - - return result; -} - -#endif