am ac322da6: Atomic/SMP update.

This commit is contained in:
Andy McFadden 2010-05-21 12:08:03 -07:00 committed by Android Git Automerger
commit 0bc248f983
6 changed files with 169 additions and 245 deletions

View File

@ -0,0 +1,101 @@
/*
* Copyright (C) 2010 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ANDROID_CUTILS_ATOMIC_INLINE_H
#define ANDROID_CUTILS_ATOMIC_INLINE_H
/*
* Inline declarations and macros for some special-purpose atomic
* operations. These are intended for rare circumstances where a
* memory barrier needs to be issued inline rather than as a function
* call.
*
* Most code should not use these.
*
* Anything that does include this file must set ANDROID_SMP to either
* 0 or 1, indicating compilation for UP or SMP, respectively.
*/
#if !defined(ANDROID_SMP)
# error "Must define ANDROID_SMP before including atomic-inline.h"
#endif
#ifdef __cplusplus
extern "C" {
#endif
/*
* Define the full memory barrier for an SMP system. This is
* platform-specific.
*/
#ifdef __arm__
#include <machine/cpu-features.h>
/*
* For ARMv6K we need to issue a specific MCR instead of the DMB, since
* that wasn't added until v7. For anything older, SMP isn't relevant.
* Since we don't have an ARMv6K to test with, we're not going to deal
* with that now.
*
* The DMB instruction is found in the ARM and Thumb2 instruction sets.
* This will fail on plain 16-bit Thumb.
*/
#if defined(__ARM_HAVE_DMB)
# define __android_membar_full_smp() \
do { __asm__ __volatile__ ("dmb" ::: "memory"); } while (0)
#else
# define __android_membar_full_smp() ARM_SMP_defined_but_no_DMB()
#endif
#elif defined(__i386__) || defined(__x86_64__)
/*
* For recent x86, we can use the SSE2 mfence instruction.
*/
# define __android_membar_full_smp() \
do { __asm__ __volatile__ ("mfence" ::: "memory"); } while (0)
#else
/*
* Implementation not defined for this platform. Hopefully we're building
* in uniprocessor mode.
*/
# define __android_membar_full_smp() SMP_barrier_not_defined_for_platform()
#endif
/*
* Full barrier. On uniprocessors this is just a compiler reorder barrier,
* which ensures that the statements appearing above the barrier in the C/C++
* code will be issued after the statements appearing below the barrier.
*
* For SMP this also includes a memory barrier instruction. On an ARM
* CPU this means that the current core will flush pending writes, wait
* for pending reads to complete, and discard any cached reads that could
* be stale. Other CPUs may do less, but the end result is equivalent.
*/
#if ANDROID_SMP != 0
# define android_membar_full() __android_membar_full_smp()
#else
# define android_membar_full() \
do { __asm__ __volatile__ ("" ::: "memory"); } while (0)
#endif
#ifdef __cplusplus
} // extern "C"
#endif
#endif // ANDROID_CUTILS_ATOMIC_INLINE_H

View File

@ -25,10 +25,8 @@ extern "C" {
#endif
/*
* NOTE: memory shared between threads is synchronized by all atomic operations
* below, this means that no explicit memory barrier is required: all reads or
* writes issued before android_atomic_* operations are guaranteed to complete
* before the atomic operation takes place.
* Unless otherwise noted, the operations below perform a full fence before
* the atomic operation on SMP systems ("release" semantics).
*/
void android_atomic_write(int32_t value, volatile int32_t* addr);
@ -37,7 +35,6 @@ void android_atomic_write(int32_t value, volatile int32_t* addr);
* all these atomic operations return the previous value
*/
int32_t android_atomic_inc(volatile int32_t* addr);
int32_t android_atomic_dec(volatile int32_t* addr);
@ -48,30 +45,32 @@ int32_t android_atomic_or(int32_t value, volatile int32_t* addr);
int32_t android_atomic_swap(int32_t value, volatile int32_t* addr);
/*
* NOTE: Two "quasiatomic" operations on the exact same memory address
* are guaranteed to operate atomically with respect to each other,
* but no guarantees are made about quasiatomic operations mixed with
* non-quasiatomic operations on the same address, nor about
* quasiatomic operations that are performed on partially-overlapping
* memory.
* cmpxchg returns zero if the new value was successfully written. This
* will only happen when *addr == oldvalue.
*
* (The return value is inverted from implementations on other platforms, but
* matches the ARM ldrex/strex sematics. Note also this is a compare-and-set
* operation, not a compare-and-exchange operation, since we don't return
* the original value.)
*/
int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr);
int64_t android_quasiatomic_read_64(volatile int64_t* addr);
/*
* cmpxchg return a non zero value if the exchange was NOT performed,
* in other words if oldvalue != *addr
*/
int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue,
volatile int32_t* addr);
int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
volatile int64_t* addr);
/*
* Same basic operation as android_atomic_cmpxchg, but with "acquire"
* semantics. The memory barrier, if required, is performed after the
* new value is stored. Useful for acquiring a spin lock.
*/
int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue,
volatile int32_t* addr);
/*
* Perform an atomic store with "release" semantics. The memory barrier,
* if required, is performed before the store instruction. Useful for
* releasing a spin lock.
*/
#define android_atomic_release_store android_atomic_write
#ifdef __cplusplus
} // extern "C"
#endif

View File

@ -16,6 +16,13 @@
LOCAL_PATH := $(my-dir)
include $(CLEAR_VARS)
ifeq ($(TARGET_CPU_SMP),true)
targetSmpFlag := -DANDROID_SMP=1
else
targetSmpFlag := -DANDROID_SMP=0
endif
hostSmpFlag := -DANDROID_SMP=0
commonSources := \
array.c \
hashmap.c \
@ -80,6 +87,7 @@ LOCAL_MODULE := libcutils
LOCAL_SRC_FILES := $(commonSources) $(commonHostSources)
LOCAL_LDLIBS := -lpthread
LOCAL_STATIC_LIBRARIES := liblog
LOCAL_CFLAGS += $(hostSmpFlag)
include $(BUILD_HOST_STATIC_LIBRARY)
@ -92,6 +100,7 @@ LOCAL_MODULE := libcutils
LOCAL_SRC_FILES := $(commonSources) $(commonHostSources) memory.c dlmalloc_stubs.c
LOCAL_LDLIBS := -lpthread
LOCAL_SHARED_LIBRARIES := liblog
LOCAL_CFLAGS += $(targetSmpFlag)
include $(BUILD_SHARED_LIBRARY)
else #!sim
@ -114,12 +123,14 @@ endif # !arm
LOCAL_C_INCLUDES := $(KERNEL_HEADERS)
LOCAL_STATIC_LIBRARIES := liblog
LOCAL_CFLAGS += $(targetSmpFlag)
include $(BUILD_STATIC_LIBRARY)
include $(CLEAR_VARS)
LOCAL_MODULE := libcutils
LOCAL_WHOLE_STATIC_LIBRARIES := libcutils
LOCAL_SHARED_LIBRARIES := liblog
LOCAL_CFLAGS += $(targetSmpFlag)
include $(BUILD_SHARED_LIBRARY)
endif #!sim

View File

@ -14,6 +14,8 @@
* limitations under the License.
*/
/* TODO: insert memory barriers on SMP */
#include <machine/cpu-features.h>
/*
@ -43,6 +45,8 @@
.global android_atomic_cmpxchg
.type android_atomic_cmpxchg, %function
.global android_atomic_acquire_cmpxchg
.type android_atomic_acquire_cmpxchg, %function
/*
* ----------------------------------------------------------------------------
@ -237,7 +241,7 @@ android_atomic_or:
/* replaced swp instruction with ldrex/strex for ARMv6 & ARMv7 */
android_atomic_swap:
#if defined (_ARM_HAVE_LDREX_STREX)
#if defined (__ARM_HAVE_LDREX_STREX)
1: ldrex r2, [r1]
strex r3, r0, [r1]
teq r3, #0
@ -256,6 +260,7 @@ android_atomic_swap:
* output: r0 = 0 (xchg done) or non-zero (xchg not done)
*/
android_atomic_acquire_cmpxchg:
android_atomic_cmpxchg:
.fnstart
.save {r4, lr}
@ -282,10 +287,3 @@ android_atomic_cmpxchg:
bx lr
.fnend
/*
* ----------------------------------------------------------------------------
* android_atomic_cmpxchg_64
* input: r0-r1=oldvalue, r2-r3=newvalue, arg4 (on stack)=address
* output: r0 = 0 (xchg done) or non-zero (xchg not done)
*/
/* TODO: NEED IMPLEMENTATION FOR THIS ARCHITECTURE */

View File

@ -118,42 +118,8 @@ int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue,
return result;
}
int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) {
int64_t oldValue;
pthread_mutex_t* lock = SWAP_LOCK(addr);
pthread_mutex_lock(lock);
oldValue = *addr;
*addr = value;
pthread_mutex_unlock(lock);
return oldValue;
int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue,
volatile int32_t* addr) {
return android_atomic_cmpxchg(oldValue, newValue, addr);
}
int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
volatile int64_t* addr) {
int result;
pthread_mutex_t* lock = SWAP_LOCK(addr);
pthread_mutex_lock(lock);
if (*addr == oldvalue) {
*addr = newvalue;
result = 0;
} else {
result = 1;
}
pthread_mutex_unlock(lock);
return result;
}
int64_t android_quasiatomic_read_64(volatile int64_t* addr) {
int64_t result;
pthread_mutex_t* lock = SWAP_LOCK(addr);
pthread_mutex_lock(lock);
result = *addr;
pthread_mutex_unlock(lock);
return result;
}

View File

@ -15,6 +15,7 @@
*/
#include <cutils/atomic.h>
#include <cutils/atomic-inline.h>
#ifdef HAVE_WIN32_THREADS
#include <windows.h>
#else
@ -70,40 +71,19 @@ int32_t android_atomic_swap(int32_t value, volatile int32_t* addr) {
}
int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, volatile int32_t* addr) {
/* OS X CAS returns zero on failure; invert to return zero on success */
return OSAtomicCompareAndSwap32Barrier(oldvalue, newvalue, (int32_t*)addr) == 0;
}
#if defined(__ppc__) \
|| defined(__PPC__) \
|| defined(__powerpc__) \
|| defined(__powerpc) \
|| defined(__POWERPC__) \
|| defined(_M_PPC) \
|| defined(__PPC)
#define NEED_QUASIATOMICS 1
#else
int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
volatile int64_t* addr) {
return OSAtomicCompareAndSwap64Barrier(oldvalue, newvalue,
(int64_t*)addr) == 0;
int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue,
volatile int32_t* addr) {
int result = (OSAtomicCompareAndSwap32(oldvalue, newvalue, (int32_t*)addr) == 0);
if (!result) {
/* success, perform barrier */
OSMemoryBarrier();
}
}
int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) {
int64_t oldValue;
do {
oldValue = *addr;
} while (android_quasiatomic_cmpxchg_64(oldValue, value, addr));
return oldValue;
}
int64_t android_quasiatomic_read_64(volatile int64_t* addr) {
return OSAtomicAdd64Barrier(0, addr);
}
#endif
/*****************************************************************************/
#elif defined(__i386__) || defined(__x86_64__)
@ -163,6 +143,7 @@ int32_t android_atomic_swap(int32_t value, volatile int32_t* addr) {
}
int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, volatile int32_t* addr) {
android_membar_full();
int xchg;
asm volatile
(
@ -175,75 +156,25 @@ int android_atomic_cmpxchg(int32_t oldvalue, int32_t newvalue, volatile int32_t*
return xchg;
}
#define NEED_QUASIATOMICS 1
int android_atomic_acquire_cmpxchg(int32_t oldvalue, int32_t newvalue,
volatile int32_t* addr) {
int xchg;
asm volatile
(
" lock; cmpxchg %%ecx, (%%edx);"
" setne %%al;"
" andl $1, %%eax"
: "=a" (xchg)
: "a" (oldvalue), "c" (newvalue), "d" (addr)
);
android_membar_full();
return xchg;
}
/*****************************************************************************/
#elif __arm__
// Most of the implementation is in atomic-android-arm.s.
// on the device, we implement the 64-bit atomic operations through
// mutex locking. normally, this is bad because we must initialize
// a pthread_mutex_t before being able to use it, and this means
// having to do an initialization check on each function call, and
// that's where really ugly things begin...
//
// BUT, as a special twist, we take advantage of the fact that in our
// pthread library, a mutex is simply a volatile word whose value is always
// initialized to 0. In other words, simply declaring a static mutex
// object initializes it !
//
// another twist is that we use a small array of mutexes to dispatch
// the contention locks from different memory addresses
//
#include <pthread.h>
#define SWAP_LOCK_COUNT 32U
static pthread_mutex_t _swap_locks[SWAP_LOCK_COUNT];
#define SWAP_LOCK(addr) \
&_swap_locks[((unsigned)(void*)(addr) >> 3U) % SWAP_LOCK_COUNT]
int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) {
int64_t oldValue;
pthread_mutex_t* lock = SWAP_LOCK(addr);
pthread_mutex_lock(lock);
oldValue = *addr;
*addr = value;
pthread_mutex_unlock(lock);
return oldValue;
}
int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
volatile int64_t* addr) {
int result;
pthread_mutex_t* lock = SWAP_LOCK(addr);
pthread_mutex_lock(lock);
if (*addr == oldvalue) {
*addr = newvalue;
result = 0;
} else {
result = 1;
}
pthread_mutex_unlock(lock);
return result;
}
int64_t android_quasiatomic_read_64(volatile int64_t* addr) {
int64_t result;
pthread_mutex_t* lock = SWAP_LOCK(addr);
pthread_mutex_lock(lock);
result = *addr;
pthread_mutex_unlock(lock);
return result;
}
// implementation for ARM is in atomic-android-arm.s.
/*****************************************************************************/
#elif __sh__
@ -255,85 +186,3 @@ int64_t android_quasiatomic_read_64(volatile int64_t* addr) {
#endif
#if NEED_QUASIATOMICS
/* Note that a spinlock is *not* a good idea in general
* since they can introduce subtle issues. For example,
* a real-time thread trying to acquire a spinlock already
* acquired by another thread will never yeld, making the
* CPU loop endlessly!
*
* However, this code is only used on the Linux simulator
* so it's probably ok for us.
*
* The alternative is to use a pthread mutex, but
* these must be initialized before being used, and
* then you have the problem of lazily initializing
* a mutex without any other synchronization primitive.
*/
/* global spinlock for all 64-bit quasiatomic operations */
static int32_t quasiatomic_spinlock = 0;
int android_quasiatomic_cmpxchg_64(int64_t oldvalue, int64_t newvalue,
volatile int64_t* addr) {
int result;
while (android_atomic_cmpxchg(0, 1, &quasiatomic_spinlock)) {
#ifdef HAVE_WIN32_THREADS
Sleep(0);
#else
sched_yield();
#endif
}
if (*addr == oldvalue) {
*addr = newvalue;
result = 0;
} else {
result = 1;
}
android_atomic_swap(0, &quasiatomic_spinlock);
return result;
}
int64_t android_quasiatomic_read_64(volatile int64_t* addr) {
int64_t result;
while (android_atomic_cmpxchg(0, 1, &quasiatomic_spinlock)) {
#ifdef HAVE_WIN32_THREADS
Sleep(0);
#else
sched_yield();
#endif
}
result = *addr;
android_atomic_swap(0, &quasiatomic_spinlock);
return result;
}
int64_t android_quasiatomic_swap_64(int64_t value, volatile int64_t* addr) {
int64_t result;
while (android_atomic_cmpxchg(0, 1, &quasiatomic_spinlock)) {
#ifdef HAVE_WIN32_THREADS
Sleep(0);
#else
sched_yield();
#endif
}
result = *addr;
*addr = value;
android_atomic_swap(0, &quasiatomic_spinlock);
return result;
}
#endif