Enabling a reduced config of HS38 (w/o div-rem, ll64...)

Adding software workaround for LLOCK/SCOND livelock
 Fallout of a recent pt_regs update
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1
 
 iQIcBAABAgAGBQJVxG0RAAoJEGnX8d3iisJe0icP/1LHI7JqO9Th52cob6aYfcC/
 Xi5LLQTTEz/68cvXHxYZo/wfaj+lZQL7YlugIHhnJfbbyXrnimoP+ljZY4VGnEfl
 F1g7dtltxVMZ7eB4/o9QsfT6ChwC5QvuHkKh+tTo8ZJ3YrU+jRwfQtPWrL/+cRnH
 U1U0dgmoU4gw5sP9+pPqcCS9qsw6ODjvFXP5QdbegTEqbLrkG77aqEaHD0WoUfKJ
 dgmluwYYRonMc1LTapNH7JeoNff5GPNDawItviuZ4tO/fPelNsujN5CYnro6thtp
 EKY1LHWicohVOJJ9PBYHb+4Th/taMYCYHWt9y/nkI860Jj9meq3PDM53iNvSJ71p
 9pU8Mung7jcN6HLCpdUB3jEQKAkX/a2hOqvWqqYuSbzBUq29HdsJUFMleCnQO4Mj
 E4eJfe0keSMKk3WOS3tlwp2KDHN1JKMfYaqBJtbYSnlhI/cHAsF9yLOjLV5adc+b
 vHglS5x7H4YxqPbiUPlDZv2Q7OD4P32wRpCTxOYynCtWZalSQ++2e2enVL3oCy7l
 e0fT04dYndhy7vfI+YCpu4dRUBRin65klrP11damhlJgF3u7RudoJC8s71SMJqs4
 +TQsuhCz5oQZty8+mPbSilCa3FZNSSlWe4tHk/F5xQ/BASXtq5m8lb9Z1SllUbKu
 +d0xcKVIDFaDTU9keHm6
 =gFBX
 -----END PGP SIGNATURE-----

Merge tag 'arc-v4.2-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc

Pull ARC fixes from Vineet Gupta:
 "Here's a late pull request for accumulated ARC fixes which came out of
  extended testing of the new ARCv2 port with LTP etc.  llock/scond
  livelock workaround has been reviewed by PeterZ.  The changes look a
  lot but I've crafted them into finer grained patches for better
  tracking later.

  I have some more fixes (ARC Futex backend) ready to go but those will
  have to wait for tglx to return from vacation.

  Summary:
   - Enable a reduced config of HS38 (w/o div-rem, ll64...)
   - Add software workaround for LLOCK/SCOND livelock
   - Fallout of a recent pt_regs update"

* tag 'arc-v4.2-rc6-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vgupta/arc:
  ARCv2: spinlock/rwlock/atomics: reduce 1 instruction in exponential backoff
  ARC: Make pt_regs regs unsigned
  ARCv2: spinlock/rwlock: Reset retry delay when starting a new spin-wait cycle
  ARCv2: spinlock/rwlock/atomics: Delayed retry of failed SCOND with exponential backoff
  ARC: LLOCK/SCOND based rwlock
  ARC: LLOCK/SCOND based spin_lock
  ARC: refactor atomic inline asm operands with symbolic names
  Revert "ARCv2: STAR 9000837815 workaround hardware exclusive transactions livelock"
  ARCv2: [axs103_smp] Reduce clk for Quad FPGA configs
  ARCv2: Fix the peripheral address space detection
  ARCv2: allow selection of page size for MMUv4
  ARCv2: lib: memset: Don't assume 64-bit load/stores
  ARCv2: lib: memcpy: Missing PREFETCHW
  ARCv2: add knob for DIV_REV in Kconfig
  ARC/time: Migrate to new 'set-state' interface
This commit is contained in:
Linus Torvalds 2015-08-08 04:38:00 +03:00
commit dd2384a75d
13 changed files with 718 additions and 116 deletions

View File

@ -313,11 +313,11 @@ config ARC_PAGE_SIZE_8K
config ARC_PAGE_SIZE_16K
bool "16KB"
depends on ARC_MMU_V3
depends on ARC_MMU_V3 || ARC_MMU_V4
config ARC_PAGE_SIZE_4K
bool "4KB"
depends on ARC_MMU_V3
depends on ARC_MMU_V3 || ARC_MMU_V4
endchoice
@ -365,6 +365,11 @@ config ARC_HAS_LLSC
default y
depends on !ARC_CANT_LLSC
config ARC_STAR_9000923308
bool "Workaround for llock/scond livelock"
default y
depends on ISA_ARCV2 && SMP && ARC_HAS_LLSC
config ARC_HAS_SWAPE
bool "Insn: SWAPE (endian-swap)"
default y
@ -379,6 +384,10 @@ config ARC_HAS_LL64
dest operands with 2 possible source operands.
default y
config ARC_HAS_DIV_REM
bool "Insn: div, divu, rem, remu"
default y
config ARC_HAS_RTC
bool "Local 64-bit r/o cycle counter"
default n

View File

@ -36,8 +36,16 @@ cflags-$(atleast_gcc44) += -fsection-anchors
cflags-$(CONFIG_ARC_HAS_LLSC) += -mlock
cflags-$(CONFIG_ARC_HAS_SWAPE) += -mswape
ifdef CONFIG_ISA_ARCV2
ifndef CONFIG_ARC_HAS_LL64
cflags-$(CONFIG_ISA_ARCV2) += -mno-ll64
cflags-y += -mno-ll64
endif
ifndef CONFIG_ARC_HAS_DIV_REM
cflags-y += -mno-div-rem
endif
endif
cflags-$(CONFIG_ARC_DW2_UNWIND) += -fasynchronous-unwind-tables

View File

@ -89,11 +89,10 @@
#define ECR_C_BIT_DTLB_LD_MISS 8
#define ECR_C_BIT_DTLB_ST_MISS 9
/* Auxiliary registers */
#define AUX_IDENTITY 4
#define AUX_INTR_VEC_BASE 0x25
#define AUX_NON_VOL 0x5e
/*
* Floating Pt Registers
@ -240,9 +239,9 @@ struct bcr_extn_xymem {
struct bcr_perip {
#ifdef CONFIG_CPU_BIG_ENDIAN
unsigned int start:8, pad2:8, sz:8, pad:8;
unsigned int start:8, pad2:8, sz:8, ver:8;
#else
unsigned int pad:8, sz:8, pad2:8, start:8;
unsigned int ver:8, sz:8, pad2:8, start:8;
#endif
};

View File

@ -23,33 +23,60 @@
#define atomic_set(v, i) (((v)->counter) = (i))
#ifdef CONFIG_ISA_ARCV2
#define PREFETCHW " prefetchw [%1] \n"
#else
#define PREFETCHW
#ifdef CONFIG_ARC_STAR_9000923308
#define SCOND_FAIL_RETRY_VAR_DEF \
unsigned int delay = 1, tmp; \
#define SCOND_FAIL_RETRY_ASM \
" bz 4f \n" \
" ; --- scond fail delay --- \n" \
" mov %[tmp], %[delay] \n" /* tmp = delay */ \
"2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \
" sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \
" rol %[delay], %[delay] \n" /* delay *= 2 */ \
" b 1b \n" /* start over */ \
"4: ; --- success --- \n" \
#define SCOND_FAIL_RETRY_VARS \
,[delay] "+&r" (delay),[tmp] "=&r" (tmp) \
#else /* !CONFIG_ARC_STAR_9000923308 */
#define SCOND_FAIL_RETRY_VAR_DEF
#define SCOND_FAIL_RETRY_ASM \
" bnz 1b \n" \
#define SCOND_FAIL_RETRY_VARS
#endif
#define ATOMIC_OP(op, c_op, asm_op) \
static inline void atomic_##op(int i, atomic_t *v) \
{ \
unsigned int temp; \
unsigned int val; \
SCOND_FAIL_RETRY_VAR_DEF \
\
__asm__ __volatile__( \
"1: \n" \
PREFETCHW \
" llock %0, [%1] \n" \
" " #asm_op " %0, %0, %2 \n" \
" scond %0, [%1] \n" \
" bnz 1b \n" \
: "=&r"(temp) /* Early clobber, to prevent reg reuse */ \
: "r"(&v->counter), "ir"(i) \
"1: llock %[val], [%[ctr]] \n" \
" " #asm_op " %[val], %[val], %[i] \n" \
" scond %[val], [%[ctr]] \n" \
" \n" \
SCOND_FAIL_RETRY_ASM \
\
: [val] "=&r" (val) /* Early clobber to prevent reg reuse */ \
SCOND_FAIL_RETRY_VARS \
: [ctr] "r" (&v->counter), /* Not "m": llock only supports reg direct addr mode */ \
[i] "ir" (i) \
: "cc"); \
} \
#define ATOMIC_OP_RETURN(op, c_op, asm_op) \
static inline int atomic_##op##_return(int i, atomic_t *v) \
{ \
unsigned int temp; \
unsigned int val; \
SCOND_FAIL_RETRY_VAR_DEF \
\
/* \
* Explicit full memory barrier needed before/after as \
@ -58,19 +85,21 @@ static inline int atomic_##op##_return(int i, atomic_t *v) \
smp_mb(); \
\
__asm__ __volatile__( \
"1: \n" \
PREFETCHW \
" llock %0, [%1] \n" \
" " #asm_op " %0, %0, %2 \n" \
" scond %0, [%1] \n" \
" bnz 1b \n" \
: "=&r"(temp) \
: "r"(&v->counter), "ir"(i) \
"1: llock %[val], [%[ctr]] \n" \
" " #asm_op " %[val], %[val], %[i] \n" \
" scond %[val], [%[ctr]] \n" \
" \n" \
SCOND_FAIL_RETRY_ASM \
\
: [val] "=&r" (val) \
SCOND_FAIL_RETRY_VARS \
: [ctr] "r" (&v->counter), \
[i] "ir" (i) \
: "cc"); \
\
smp_mb(); \
\
return temp; \
return val; \
}
#else /* !CONFIG_ARC_HAS_LLSC */
@ -150,6 +179,9 @@ ATOMIC_OP(and, &=, and)
#undef ATOMIC_OPS
#undef ATOMIC_OP_RETURN
#undef ATOMIC_OP
#undef SCOND_FAIL_RETRY_VAR_DEF
#undef SCOND_FAIL_RETRY_ASM
#undef SCOND_FAIL_RETRY_VARS
/**
* __atomic_add_unless - add unless the number is a given value

View File

@ -20,20 +20,20 @@
struct pt_regs {
/* Real registers */
long bta; /* bta_l1, bta_l2, erbta */
unsigned long bta; /* bta_l1, bta_l2, erbta */
long lp_start, lp_end, lp_count;
unsigned long lp_start, lp_end, lp_count;
long status32; /* status32_l1, status32_l2, erstatus */
long ret; /* ilink1, ilink2 or eret */
long blink;
long fp;
long r26; /* gp */
unsigned long status32; /* status32_l1, status32_l2, erstatus */
unsigned long ret; /* ilink1, ilink2 or eret */
unsigned long blink;
unsigned long fp;
unsigned long r26; /* gp */
long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0;
unsigned long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0;
long sp; /* user/kernel sp depending on where we came from */
long orig_r0;
unsigned long sp; /* User/Kernel depending on where we came from */
unsigned long orig_r0;
/*
* To distinguish bet excp, syscall, irq
@ -55,13 +55,13 @@ struct pt_regs {
unsigned long event;
};
long user_r25;
unsigned long user_r25;
};
#else
struct pt_regs {
long orig_r0;
unsigned long orig_r0;
union {
struct {
@ -76,26 +76,26 @@ struct pt_regs {
unsigned long event;
};
long bta; /* bta_l1, bta_l2, erbta */
unsigned long bta; /* bta_l1, bta_l2, erbta */
long user_r25;
unsigned long user_r25;
long r26; /* gp */
long fp;
long sp; /* user/kernel sp depending on where we came from */
unsigned long r26; /* gp */
unsigned long fp;
unsigned long sp; /* user/kernel sp depending on where we came from */
long r12;
unsigned long r12;
/*------- Below list auto saved by h/w -----------*/
long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
unsigned long r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11;
long blink;
long lp_end, lp_start, lp_count;
unsigned long blink;
unsigned long lp_end, lp_start, lp_count;
long ei, ldi, jli;
unsigned long ei, ldi, jli;
long ret;
long status32;
unsigned long ret;
unsigned long status32;
};
#endif
@ -103,10 +103,10 @@ struct pt_regs {
/* Callee saved registers - need to be saved only when you are scheduled out */
struct callee_regs {
long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13;
unsigned long r25, r24, r23, r22, r21, r20, r19, r18, r17, r16, r15, r14, r13;
};
#define instruction_pointer(regs) (unsigned long)((regs)->ret)
#define instruction_pointer(regs) ((regs)->ret)
#define profile_pc(regs) instruction_pointer(regs)
/* return 1 if user mode or 0 if kernel mode */
@ -142,7 +142,7 @@ struct callee_regs {
static inline long regs_return_value(struct pt_regs *regs)
{
return regs->r0;
return (long)regs->r0;
}
#endif /* !__ASSEMBLY__ */

View File

@ -18,9 +18,518 @@
#define arch_spin_unlock_wait(x) \
do { while (arch_spin_is_locked(x)) cpu_relax(); } while (0)
#ifdef CONFIG_ARC_HAS_LLSC
/*
* A normal LLOCK/SCOND based system, w/o need for livelock workaround
*/
#ifndef CONFIG_ARC_STAR_9000923308
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__;
unsigned int val;
smp_mb();
__asm__ __volatile__(
"1: llock %[val], [%[slock]] \n"
" breq %[val], %[LOCKED], 1b \n" /* spin while LOCKED */
" scond %[LOCKED], [%[slock]] \n" /* acquire */
" bnz 1b \n"
" \n"
: [val] "=&r" (val)
: [slock] "r" (&(lock->slock)),
[LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
: "memory", "cc");
smp_mb();
}
/* 1 - lock taken successfully */
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
unsigned int val, got_it = 0;
smp_mb();
__asm__ __volatile__(
"1: llock %[val], [%[slock]] \n"
" breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */
" scond %[LOCKED], [%[slock]] \n" /* acquire */
" bnz 1b \n"
" mov %[got_it], 1 \n"
"4: \n"
" \n"
: [val] "=&r" (val),
[got_it] "+&r" (got_it)
: [slock] "r" (&(lock->slock)),
[LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
: "memory", "cc");
smp_mb();
return got_it;
}
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
smp_mb();
lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
smp_mb();
}
/*
* Read-write spinlocks, allowing multiple readers but only one writer.
* Unfair locking as Writers could be starved indefinitely by Reader(s)
*/
static inline void arch_read_lock(arch_rwlock_t *rw)
{
unsigned int val;
smp_mb();
/*
* zero means writer holds the lock exclusively, deny Reader.
* Otherwise grant lock to first/subseq reader
*
* if (rw->counter > 0) {
* rw->counter--;
* ret = 1;
* }
*/
__asm__ __volatile__(
"1: llock %[val], [%[rwlock]] \n"
" brls %[val], %[WR_LOCKED], 1b\n" /* <= 0: spin while write locked */
" sub %[val], %[val], 1 \n" /* reader lock */
" scond %[val], [%[rwlock]] \n"
" bnz 1b \n"
" \n"
: [val] "=&r" (val)
: [rwlock] "r" (&(rw->counter)),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
}
/* 1 - lock taken successfully */
static inline int arch_read_trylock(arch_rwlock_t *rw)
{
unsigned int val, got_it = 0;
smp_mb();
__asm__ __volatile__(
"1: llock %[val], [%[rwlock]] \n"
" brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */
" sub %[val], %[val], 1 \n" /* counter-- */
" scond %[val], [%[rwlock]] \n"
" bnz 1b \n" /* retry if collided with someone */
" mov %[got_it], 1 \n"
" \n"
"4: ; --- done --- \n"
: [val] "=&r" (val),
[got_it] "+&r" (got_it)
: [rwlock] "r" (&(rw->counter)),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
return got_it;
}
static inline void arch_write_lock(arch_rwlock_t *rw)
{
unsigned int val;
smp_mb();
/*
* If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
* deny writer. Otherwise if unlocked grant to writer
* Hence the claim that Linux rwlocks are unfair to writers.
* (can be starved for an indefinite time by readers).
*
* if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) {
* rw->counter = 0;
* ret = 1;
* }
*/
__asm__ __volatile__(
"1: llock %[val], [%[rwlock]] \n"
" brne %[val], %[UNLOCKED], 1b \n" /* while !UNLOCKED spin */
" mov %[val], %[WR_LOCKED] \n"
" scond %[val], [%[rwlock]] \n"
" bnz 1b \n"
" \n"
: [val] "=&r" (val)
: [rwlock] "r" (&(rw->counter)),
[UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
}
/* 1 - lock taken successfully */
static inline int arch_write_trylock(arch_rwlock_t *rw)
{
unsigned int val, got_it = 0;
smp_mb();
__asm__ __volatile__(
"1: llock %[val], [%[rwlock]] \n"
" brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */
" mov %[val], %[WR_LOCKED] \n"
" scond %[val], [%[rwlock]] \n"
" bnz 1b \n" /* retry if collided with someone */
" mov %[got_it], 1 \n"
" \n"
"4: ; --- done --- \n"
: [val] "=&r" (val),
[got_it] "+&r" (got_it)
: [rwlock] "r" (&(rw->counter)),
[UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
return got_it;
}
static inline void arch_read_unlock(arch_rwlock_t *rw)
{
unsigned int val;
smp_mb();
/*
* rw->counter++;
*/
__asm__ __volatile__(
"1: llock %[val], [%[rwlock]] \n"
" add %[val], %[val], 1 \n"
" scond %[val], [%[rwlock]] \n"
" bnz 1b \n"
" \n"
: [val] "=&r" (val)
: [rwlock] "r" (&(rw->counter))
: "memory", "cc");
smp_mb();
}
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
smp_mb();
rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
smp_mb();
}
#else /* CONFIG_ARC_STAR_9000923308 */
/*
* HS38x4 could get into a LLOCK/SCOND livelock in case of multiple overlapping
* coherency transactions in the SCU. The exclusive line state keeps rotating
* among contenting cores leading to a never ending cycle. So break the cycle
* by deferring the retry of failed exclusive access (SCOND). The actual delay
* needed is function of number of contending cores as well as the unrelated
* coherency traffic from other cores. To keep the code simple, start off with
* small delay of 1 which would suffice most cases and in case of contention
* double the delay. Eventually the delay is sufficient such that the coherency
* pipeline is drained, thus a subsequent exclusive access would succeed.
*/
#define SCOND_FAIL_RETRY_VAR_DEF \
unsigned int delay, tmp; \
#define SCOND_FAIL_RETRY_ASM \
" ; --- scond fail delay --- \n" \
" mov %[tmp], %[delay] \n" /* tmp = delay */ \
"2: brne.d %[tmp], 0, 2b \n" /* while (tmp != 0) */ \
" sub %[tmp], %[tmp], 1 \n" /* tmp-- */ \
" rol %[delay], %[delay] \n" /* delay *= 2 */ \
" b 1b \n" /* start over */ \
" \n" \
"4: ; --- done --- \n" \
#define SCOND_FAIL_RETRY_VARS \
,[delay] "=&r" (delay), [tmp] "=&r" (tmp) \
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
unsigned int val;
SCOND_FAIL_RETRY_VAR_DEF;
smp_mb();
__asm__ __volatile__(
"0: mov %[delay], 1 \n"
"1: llock %[val], [%[slock]] \n"
" breq %[val], %[LOCKED], 0b \n" /* spin while LOCKED */
" scond %[LOCKED], [%[slock]] \n" /* acquire */
" bz 4f \n" /* done */
" \n"
SCOND_FAIL_RETRY_ASM
: [val] "=&r" (val)
SCOND_FAIL_RETRY_VARS
: [slock] "r" (&(lock->slock)),
[LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
: "memory", "cc");
smp_mb();
}
/* 1 - lock taken successfully */
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
unsigned int val, got_it = 0;
SCOND_FAIL_RETRY_VAR_DEF;
smp_mb();
__asm__ __volatile__(
"0: mov %[delay], 1 \n"
"1: llock %[val], [%[slock]] \n"
" breq %[val], %[LOCKED], 4f \n" /* already LOCKED, just bail */
" scond %[LOCKED], [%[slock]] \n" /* acquire */
" bz.d 4f \n"
" mov.z %[got_it], 1 \n" /* got it */
" \n"
SCOND_FAIL_RETRY_ASM
: [val] "=&r" (val),
[got_it] "+&r" (got_it)
SCOND_FAIL_RETRY_VARS
: [slock] "r" (&(lock->slock)),
[LOCKED] "r" (__ARCH_SPIN_LOCK_LOCKED__)
: "memory", "cc");
smp_mb();
return got_it;
}
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
smp_mb();
lock->slock = __ARCH_SPIN_LOCK_UNLOCKED__;
smp_mb();
}
/*
* Read-write spinlocks, allowing multiple readers but only one writer.
* Unfair locking as Writers could be starved indefinitely by Reader(s)
*/
static inline void arch_read_lock(arch_rwlock_t *rw)
{
unsigned int val;
SCOND_FAIL_RETRY_VAR_DEF;
smp_mb();
/*
* zero means writer holds the lock exclusively, deny Reader.
* Otherwise grant lock to first/subseq reader
*
* if (rw->counter > 0) {
* rw->counter--;
* ret = 1;
* }
*/
__asm__ __volatile__(
"0: mov %[delay], 1 \n"
"1: llock %[val], [%[rwlock]] \n"
" brls %[val], %[WR_LOCKED], 0b\n" /* <= 0: spin while write locked */
" sub %[val], %[val], 1 \n" /* reader lock */
" scond %[val], [%[rwlock]] \n"
" bz 4f \n" /* done */
" \n"
SCOND_FAIL_RETRY_ASM
: [val] "=&r" (val)
SCOND_FAIL_RETRY_VARS
: [rwlock] "r" (&(rw->counter)),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
}
/* 1 - lock taken successfully */
static inline int arch_read_trylock(arch_rwlock_t *rw)
{
unsigned int val, got_it = 0;
SCOND_FAIL_RETRY_VAR_DEF;
smp_mb();
__asm__ __volatile__(
"0: mov %[delay], 1 \n"
"1: llock %[val], [%[rwlock]] \n"
" brls %[val], %[WR_LOCKED], 4f\n" /* <= 0: already write locked, bail */
" sub %[val], %[val], 1 \n" /* counter-- */
" scond %[val], [%[rwlock]] \n"
" bz.d 4f \n"
" mov.z %[got_it], 1 \n" /* got it */
" \n"
SCOND_FAIL_RETRY_ASM
: [val] "=&r" (val),
[got_it] "+&r" (got_it)
SCOND_FAIL_RETRY_VARS
: [rwlock] "r" (&(rw->counter)),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
return got_it;
}
static inline void arch_write_lock(arch_rwlock_t *rw)
{
unsigned int val;
SCOND_FAIL_RETRY_VAR_DEF;
smp_mb();
/*
* If reader(s) hold lock (lock < __ARCH_RW_LOCK_UNLOCKED__),
* deny writer. Otherwise if unlocked grant to writer
* Hence the claim that Linux rwlocks are unfair to writers.
* (can be starved for an indefinite time by readers).
*
* if (rw->counter == __ARCH_RW_LOCK_UNLOCKED__) {
* rw->counter = 0;
* ret = 1;
* }
*/
__asm__ __volatile__(
"0: mov %[delay], 1 \n"
"1: llock %[val], [%[rwlock]] \n"
" brne %[val], %[UNLOCKED], 0b \n" /* while !UNLOCKED spin */
" mov %[val], %[WR_LOCKED] \n"
" scond %[val], [%[rwlock]] \n"
" bz 4f \n"
" \n"
SCOND_FAIL_RETRY_ASM
: [val] "=&r" (val)
SCOND_FAIL_RETRY_VARS
: [rwlock] "r" (&(rw->counter)),
[UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
}
/* 1 - lock taken successfully */
static inline int arch_write_trylock(arch_rwlock_t *rw)
{
unsigned int val, got_it = 0;
SCOND_FAIL_RETRY_VAR_DEF;
smp_mb();
__asm__ __volatile__(
"0: mov %[delay], 1 \n"
"1: llock %[val], [%[rwlock]] \n"
" brne %[val], %[UNLOCKED], 4f \n" /* !UNLOCKED, bail */
" mov %[val], %[WR_LOCKED] \n"
" scond %[val], [%[rwlock]] \n"
" bz.d 4f \n"
" mov.z %[got_it], 1 \n" /* got it */
" \n"
SCOND_FAIL_RETRY_ASM
: [val] "=&r" (val),
[got_it] "+&r" (got_it)
SCOND_FAIL_RETRY_VARS
: [rwlock] "r" (&(rw->counter)),
[UNLOCKED] "ir" (__ARCH_RW_LOCK_UNLOCKED__),
[WR_LOCKED] "ir" (0)
: "memory", "cc");
smp_mb();
return got_it;
}
static inline void arch_read_unlock(arch_rwlock_t *rw)
{
unsigned int val;
smp_mb();
/*
* rw->counter++;
*/
__asm__ __volatile__(
"1: llock %[val], [%[rwlock]] \n"
" add %[val], %[val], 1 \n"
" scond %[val], [%[rwlock]] \n"
" bnz 1b \n"
" \n"
: [val] "=&r" (val)
: [rwlock] "r" (&(rw->counter))
: "memory", "cc");
smp_mb();
}
static inline void arch_write_unlock(arch_rwlock_t *rw)
{
unsigned int val;
smp_mb();
/*
* rw->counter = __ARCH_RW_LOCK_UNLOCKED__;
*/
__asm__ __volatile__(
"1: llock %[val], [%[rwlock]] \n"
" scond %[UNLOCKED], [%[rwlock]]\n"
" bnz 1b \n"
" \n"
: [val] "=&r" (val)
: [rwlock] "r" (&(rw->counter)),
[UNLOCKED] "r" (__ARCH_RW_LOCK_UNLOCKED__)
: "memory", "cc");
smp_mb();
}
#undef SCOND_FAIL_RETRY_VAR_DEF
#undef SCOND_FAIL_RETRY_ASM
#undef SCOND_FAIL_RETRY_VARS
#endif /* CONFIG_ARC_STAR_9000923308 */
#else /* !CONFIG_ARC_HAS_LLSC */
static inline void arch_spin_lock(arch_spinlock_t *lock)
{
unsigned int val = __ARCH_SPIN_LOCK_LOCKED__;
/*
* This smp_mb() is technically superfluous, we only need the one
@ -33,7 +542,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
__asm__ __volatile__(
"1: ex %0, [%1] \n"
" breq %0, %2, 1b \n"
: "+&r" (tmp)
: "+&r" (val)
: "r"(&(lock->slock)), "ir"(__ARCH_SPIN_LOCK_LOCKED__)
: "memory");
@ -48,26 +557,27 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
smp_mb();
}
/* 1 - lock taken successfully */
static inline int arch_spin_trylock(arch_spinlock_t *lock)
{
unsigned int tmp = __ARCH_SPIN_LOCK_LOCKED__;
unsigned int val = __ARCH_SPIN_LOCK_LOCKED__;
smp_mb();
__asm__ __volatile__(
"1: ex %0, [%1] \n"
: "+r" (tmp)
: "+r" (val)
: "r"(&(lock->slock))
: "memory");
smp_mb();
return (tmp == __ARCH_SPIN_LOCK_UNLOCKED__);
return (val == __ARCH_SPIN_LOCK_UNLOCKED__);
}
static inline void arch_spin_unlock(arch_spinlock_t *lock)
{
unsigned int tmp = __ARCH_SPIN_LOCK_UNLOCKED__;
unsigned int val = __ARCH_SPIN_LOCK_UNLOCKED__;
/*
* RELEASE barrier: given the instructions avail on ARCv2, full barrier
@ -77,7 +587,7 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
__asm__ __volatile__(
" ex %0, [%1] \n"
: "+r" (tmp)
: "+r" (val)
: "r"(&(lock->slock))
: "memory");
@ -90,19 +600,12 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock)
/*
* Read-write spinlocks, allowing multiple readers but only one writer.
* Unfair locking as Writers could be starved indefinitely by Reader(s)
*
* The spinlock itself is contained in @counter and access to it is
* serialized with @lock_mutex.
*
* Unfair locking as Writers could be starved indefinitely by Reader(s)
*/
/* Would read_trylock() succeed? */
#define arch_read_can_lock(x) ((x)->counter > 0)
/* Would write_trylock() succeed? */
#define arch_write_can_lock(x) ((x)->counter == __ARCH_RW_LOCK_UNLOCKED__)
/* 1 - lock taken successfully */
static inline int arch_read_trylock(arch_rwlock_t *rw)
{
@ -173,6 +676,11 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
arch_spin_unlock(&(rw->lock_mutex));
}
#endif
#define arch_read_can_lock(x) ((x)->counter > 0)
#define arch_write_can_lock(x) ((x)->counter == __ARCH_RW_LOCK_UNLOCKED__)
#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)

View File

@ -26,7 +26,9 @@ typedef struct {
*/
typedef struct {
volatile unsigned int counter;
#ifndef CONFIG_ARC_HAS_LLSC
arch_spinlock_t lock_mutex;
#endif
} arch_rwlock_t;
#define __ARCH_RW_LOCK_UNLOCKED__ 0x01000000

View File

@ -32,20 +32,20 @@
*/
struct user_regs_struct {
long pad;
unsigned long pad;
struct {
long bta, lp_start, lp_end, lp_count;
long status32, ret, blink, fp, gp;
long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0;
long sp;
unsigned long bta, lp_start, lp_end, lp_count;
unsigned long status32, ret, blink, fp, gp;
unsigned long r12, r11, r10, r9, r8, r7, r6, r5, r4, r3, r2, r1, r0;
unsigned long sp;
} scratch;
long pad2;
unsigned long pad2;
struct {
long r25, r24, r23, r22, r21, r20;
long r19, r18, r17, r16, r15, r14, r13;
unsigned long r25, r24, r23, r22, r21, r20;
unsigned long r19, r18, r17, r16, r15, r14, r13;
} callee;
long efa; /* break pt addr, for break points in delay slots */
long stop_pc; /* give dbg stop_pc after ensuring brkpt trap */
unsigned long efa; /* break pt addr, for break points in delay slots */
unsigned long stop_pc; /* give dbg stop_pc after ensuring brkpt trap */
};
#endif /* !__ASSEMBLY__ */

View File

@ -47,6 +47,7 @@ static void read_arc_build_cfg_regs(void)
struct bcr_perip uncached_space;
struct bcr_generic bcr;
struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()];
unsigned long perip_space;
FIX_PTR(cpu);
READ_BCR(AUX_IDENTITY, cpu->core);
@ -56,7 +57,12 @@ static void read_arc_build_cfg_regs(void)
cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space);
BUG_ON((uncached_space.start << 24) != ARC_UNCACHED_ADDR_SPACE);
if (uncached_space.ver < 3)
perip_space = uncached_space.start << 24;
else
perip_space = read_aux_reg(AUX_NON_VOL) & 0xF0000000;
BUG_ON(perip_space != ARC_UNCACHED_ADDR_SPACE);
READ_BCR(ARC_REG_MUL_BCR, cpu->extn_mpy);
@ -330,6 +336,10 @@ static void arc_chk_core_config(void)
pr_warn("CONFIG_ARC_FPU_SAVE_RESTORE needed for working apps\n");
else if (!cpu->extn.fpu_dp && fpu_enabled)
panic("FPU non-existent, disable CONFIG_ARC_FPU_SAVE_RESTORE\n");
if (is_isa_arcv2() && IS_ENABLED(CONFIG_SMP) && cpu->isa.atomic &&
!IS_ENABLED(CONFIG_ARC_STAR_9000923308))
panic("llock/scond livelock workaround missing\n");
}
/*

View File

@ -203,34 +203,24 @@ static int arc_clkevent_set_next_event(unsigned long delta,
return 0;
}
static void arc_clkevent_set_mode(enum clock_event_mode mode,
struct clock_event_device *dev)
static int arc_clkevent_set_periodic(struct clock_event_device *dev)
{
switch (mode) {
case CLOCK_EVT_MODE_PERIODIC:
/*
* At X Hz, 1 sec = 1000ms -> X cycles;
* 10ms -> X / 100 cycles
*/
arc_timer_event_setup(arc_get_core_freq() / HZ);
break;
case CLOCK_EVT_MODE_ONESHOT:
break;
default:
break;
}
return;
/*
* At X Hz, 1 sec = 1000ms -> X cycles;
* 10ms -> X / 100 cycles
*/
arc_timer_event_setup(arc_get_core_freq() / HZ);
return 0;
}
static DEFINE_PER_CPU(struct clock_event_device, arc_clockevent_device) = {
.name = "ARC Timer0",
.features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
.mode = CLOCK_EVT_MODE_UNUSED,
.rating = 300,
.irq = TIMER0_IRQ, /* hardwired, no need for resources */
.set_next_event = arc_clkevent_set_next_event,
.set_mode = arc_clkevent_set_mode,
.name = "ARC Timer0",
.features = CLOCK_EVT_FEAT_ONESHOT |
CLOCK_EVT_FEAT_PERIODIC,
.rating = 300,
.irq = TIMER0_IRQ, /* hardwired, no need for resources */
.set_next_event = arc_clkevent_set_next_event,
.set_state_periodic = arc_clkevent_set_periodic,
};
static irqreturn_t timer_irq_handler(int irq, void *dev_id)
@ -240,7 +230,7 @@ static irqreturn_t timer_irq_handler(int irq, void *dev_id)
* irq_set_chip_and_handler() asked for handle_percpu_devid_irq()
*/
struct clock_event_device *evt = this_cpu_ptr(&arc_clockevent_device);
int irq_reenable = evt->mode == CLOCK_EVT_MODE_PERIODIC;
int irq_reenable = clockevent_state_periodic(evt);
/*
* Any write to CTRL reg ACks the interrupt, we rewrite the

View File

@ -206,7 +206,7 @@ unalignedOffby3:
ld.ab r6, [r1, 4]
prefetch [r1, 28] ;Prefetch the next read location
ld.ab r8, [r1,4]
prefetch [r3, 32] ;Prefetch the next write location
prefetchw [r3, 32] ;Prefetch the next write location
SHIFT_1 (r7, r6, 8)
or r7, r7, r5

View File

@ -10,12 +10,6 @@
#undef PREALLOC_NOT_AVAIL
#ifdef PREALLOC_NOT_AVAIL
#define PREWRITE(A,B) prefetchw [(A),(B)]
#else
#define PREWRITE(A,B) prealloc [(A),(B)]
#endif
ENTRY(memset)
prefetchw [r0] ; Prefetch the write location
mov.f 0, r2
@ -51,9 +45,15 @@ ENTRY(memset)
;;; Convert len to Dwords, unfold x8
lsr.f lp_count, lp_count, 6
lpnz @.Lset64bytes
;; LOOP START
PREWRITE(r3, 64) ;Prefetch the next write location
#ifdef PREALLOC_NOT_AVAIL
prefetchw [r3, 64] ;Prefetch the next write location
#else
prealloc [r3, 64]
#endif
#ifdef CONFIG_ARC_HAS_LL64
std.ab r4, [r3, 8]
std.ab r4, [r3, 8]
std.ab r4, [r3, 8]
@ -62,16 +62,45 @@ ENTRY(memset)
std.ab r4, [r3, 8]
std.ab r4, [r3, 8]
std.ab r4, [r3, 8]
#else
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
#endif
.Lset64bytes:
lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
lpnz .Lset32bytes
;; LOOP START
prefetchw [r3, 32] ;Prefetch the next write location
#ifdef CONFIG_ARC_HAS_LL64
std.ab r4, [r3, 8]
std.ab r4, [r3, 8]
std.ab r4, [r3, 8]
std.ab r4, [r3, 8]
#else
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
st.ab r4, [r3, 4]
#endif
.Lset32bytes:
and.f lp_count, r2, 0x1F ;Last remaining 31 bytes

View File

@ -389,6 +389,21 @@ axs103_set_freq(unsigned int id, unsigned int fd, unsigned int od)
static void __init axs103_early_init(void)
{
/*
* AXS103 configurations for SMP/QUAD configurations share device tree
* which defaults to 90 MHz. However recent failures of Quad config
* revealed P&R timing violations so clamp it down to safe 50 MHz
* Instead of duplicating defconfig/DT for SMP/QUAD, add a small hack
*
* This hack is really hacky as of now. Fix it properly by getting the
* number of cores as return value of platform's early SMP callback
*/
#ifdef CONFIG_ARC_MCIP
unsigned int num_cores = (read_aux_reg(ARC_REG_MCIP_BCR) >> 16) & 0x3F;
if (num_cores > 2)
arc_set_core_freq(50 * 1000000);
#endif
switch (arc_get_core_freq()/1000000) {
case 33:
axs103_set_freq(1, 1, 1);