Merge branch 'lkmm-for-mingo' of git://git.kernel.org/pub/scm/linux/kernel/git/paulmck/linux-rcu into locking/core
Pull various memory-model (LKMM) updates from Paul E. McKenney. Signed-off-by: Ingo Molnar <mingo@kernel.org>
This commit is contained in:
commit
f7c2b7477b
|
@ -56,6 +56,23 @@ Barriers:
|
|||
smp_mb__{before,after}_atomic()
|
||||
|
||||
|
||||
TYPES (signed vs unsigned)
|
||||
-----
|
||||
|
||||
While atomic_t, atomic_long_t and atomic64_t use int, long and s64
|
||||
respectively (for hysterical raisins), the kernel uses -fno-strict-overflow
|
||||
(which implies -fwrapv) and defines signed overflow to behave like
|
||||
2s-complement.
|
||||
|
||||
Therefore, an explicitly unsigned variant of the atomic ops is strictly
|
||||
unnecessary and we can simply cast, there is no UB.
|
||||
|
||||
There was a bug in UBSAN prior to GCC-8 that would generate UB warnings for
|
||||
signed types.
|
||||
|
||||
With this we also conform to the C/C++ _Atomic behaviour and things like
|
||||
P1236R1.
|
||||
|
||||
|
||||
SEMANTICS
|
||||
---------
|
||||
|
|
|
@ -493,10 +493,8 @@ CPU 에게 기대할 수 있는 최소한의 보장사항 몇가지가 있습니
|
|||
이 타입의 오퍼레이션은 단방향의 투과성 배리어처럼 동작합니다. ACQUIRE
|
||||
오퍼레이션 뒤의 모든 메모리 오퍼레이션들이 ACQUIRE 오퍼레이션 후에
|
||||
일어난 것으로 시스템의 나머지 컴포넌트들에 보이게 될 것이 보장됩니다.
|
||||
LOCK 오퍼레이션과 smp_load_acquire(), smp_cond_acquire() 오퍼레이션도
|
||||
ACQUIRE 오퍼레이션에 포함됩니다. smp_cond_acquire() 오퍼레이션은 컨트롤
|
||||
의존성과 smp_rmb() 를 사용해서 ACQUIRE 의 의미적 요구사항(semantic)을
|
||||
충족시킵니다.
|
||||
LOCK 오퍼레이션과 smp_load_acquire(), smp_cond_load_acquire() 오퍼레이션도
|
||||
ACQUIRE 오퍼레이션에 포함됩니다.
|
||||
|
||||
ACQUIRE 오퍼레이션 앞의 메모리 오퍼레이션들은 ACQUIRE 오퍼레이션 완료 후에
|
||||
수행된 것처럼 보일 수 있습니다.
|
||||
|
@ -2146,33 +2144,40 @@ set_current_state() 는 다음의 것들로 감싸질 수도 있습니다:
|
|||
event_indicated = 1;
|
||||
wake_up_process(event_daemon);
|
||||
|
||||
wake_up() 류에 의해 쓰기 메모리 배리어가 내포됩니다. 만약 그것들이 뭔가를
|
||||
깨운다면요. 이 배리어는 태스크 상태가 지워지기 전에 수행되므로, 이벤트를
|
||||
알리기 위한 STORE 와 태스크 상태를 TASK_RUNNING 으로 설정하는 STORE 사이에
|
||||
위치하게 됩니다.
|
||||
wake_up() 이 무언가를 깨우게 되면, 이 함수는 범용 메모리 배리어를 수행합니다.
|
||||
이 함수가 아무것도 깨우지 않는다면 메모리 배리어는 수행될 수도, 수행되지 않을
|
||||
수도 있습니다; 이 경우에 메모리 배리어를 수행할 거라 오해해선 안됩니다. 이
|
||||
배리어는 태스크 상태가 접근되기 전에 수행되는데, 자세히 말하면 이 이벤트를
|
||||
알리기 위한 STORE 와 TASK_RUNNING 으로 상태를 쓰는 STORE 사이에 수행됩니다:
|
||||
|
||||
CPU 1 CPU 2
|
||||
CPU 1 (Sleeper) CPU 2 (Waker)
|
||||
=============================== ===============================
|
||||
set_current_state(); STORE event_indicated
|
||||
smp_store_mb(); wake_up();
|
||||
STORE current->state <쓰기 배리어>
|
||||
<범용 배리어> STORE current->state
|
||||
LOAD event_indicated
|
||||
STORE current->state ...
|
||||
<범용 배리어> <범용 배리어>
|
||||
LOAD event_indicated if ((LOAD task->state) & TASK_NORMAL)
|
||||
STORE task->state
|
||||
|
||||
한번더 말합니다만, 이 쓰기 메모리 배리어는 이 코드가 정말로 뭔가를 깨울 때에만
|
||||
실행됩니다. 이걸 설명하기 위해, X 와 Y 는 모두 0 으로 초기화 되어 있다는 가정
|
||||
하에 아래의 이벤트 시퀀스를 생각해 봅시다:
|
||||
여기서 "task" 는 깨어나지는 쓰레드이고 CPU 1 의 "current" 와 같습니다.
|
||||
|
||||
반복하지만, wake_up() 이 무언가를 정말 깨운다면 범용 메모리 배리어가 수행될
|
||||
것이 보장되지만, 그렇지 않다면 그런 보장이 없습니다. 이걸 이해하기 위해, X 와
|
||||
Y 는 모두 0 으로 초기화 되어 있다는 가정 하에 아래의 이벤트 시퀀스를 생각해
|
||||
봅시다:
|
||||
|
||||
CPU 1 CPU 2
|
||||
=============================== ===============================
|
||||
X = 1; STORE event_indicated
|
||||
X = 1; Y = 1;
|
||||
smp_mb(); wake_up();
|
||||
Y = 1; wait_event(wq, Y == 1);
|
||||
wake_up(); load from Y sees 1, no memory barrier
|
||||
load from X might see 0
|
||||
LOAD Y LOAD X
|
||||
|
||||
위 예제에서의 경우와 달리 깨우기가 정말로 행해졌다면, CPU 2 의 X 로드는 1 을
|
||||
본다고 보장될 수 있을 겁니다.
|
||||
정말로 깨우기가 행해졌다면, 두 로드 중 (최소한) 하나는 1 을 보게 됩니다.
|
||||
반면에, 실제 깨우기가 행해지지 않았다면, 두 로드 모두 0을 볼 수도 있습니다.
|
||||
|
||||
wake_up_process() 는 항상 범용 메모리 배리어를 수행합니다. 이 배리어 역시
|
||||
태스크 상태가 접근되기 전에 수행됩니다. 특히, 앞의 예제 코드에서 wake_up() 이
|
||||
wake_up_process() 로 대체된다면 두 로드 중 하나는 1을 볼 것이 보장됩니다.
|
||||
|
||||
사용 가능한 깨우기류 함수들로 다음과 같은 것들이 있습니다:
|
||||
|
||||
|
@ -2192,6 +2197,8 @@ wake_up() 류에 의해 쓰기 메모리 배리어가 내포됩니다. 만약
|
|||
wake_up_poll();
|
||||
wake_up_process();
|
||||
|
||||
메모리 순서규칙 관점에서, 이 함수들은 모두 wake_up() 과 같거나 보다 강한 순서
|
||||
보장을 제공합니다.
|
||||
|
||||
[!] 잠재우는 코드와 깨우는 코드에 내포되는 메모리 배리어들은 깨우기 전에
|
||||
이루어진 스토어를 잠재우는 코드가 set_current_state() 를 호출한 후에 행하는
|
||||
|
|
|
@ -27,7 +27,7 @@ Explanation of the Linux-Kernel Memory Consistency Model
|
|||
19. AND THEN THERE WAS ALPHA
|
||||
20. THE HAPPENS-BEFORE RELATION: hb
|
||||
21. THE PROPAGATES-BEFORE RELATION: pb
|
||||
22. RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb
|
||||
22. RCU RELATIONS: rcu-link, rcu-gp, rcu-rscsi, rcu-fence, and rb
|
||||
23. LOCKING
|
||||
24. ODDS AND ENDS
|
||||
|
||||
|
@ -1430,8 +1430,8 @@ they execute means that it cannot have cycles. This requirement is
|
|||
the content of the LKMM's "propagation" axiom.
|
||||
|
||||
|
||||
RCU RELATIONS: rcu-link, gp, rscs, rcu-fence, and rb
|
||||
----------------------------------------------------
|
||||
RCU RELATIONS: rcu-link, rcu-gp, rcu-rscsi, rcu-fence, and rb
|
||||
-------------------------------------------------------------
|
||||
|
||||
RCU (Read-Copy-Update) is a powerful synchronization mechanism. It
|
||||
rests on two concepts: grace periods and read-side critical sections.
|
||||
|
@ -1446,17 +1446,19 @@ As far as memory models are concerned, RCU's main feature is its
|
|||
Grace-Period Guarantee, which states that a critical section can never
|
||||
span a full grace period. In more detail, the Guarantee says:
|
||||
|
||||
If a critical section starts before a grace period then it
|
||||
must end before the grace period does. In addition, every
|
||||
store that propagates to the critical section's CPU before the
|
||||
end of the critical section must propagate to every CPU before
|
||||
the end of the grace period.
|
||||
For any critical section C and any grace period G, at least
|
||||
one of the following statements must hold:
|
||||
|
||||
If a critical section ends after a grace period ends then it
|
||||
must start after the grace period does. In addition, every
|
||||
store that propagates to the grace period's CPU before the
|
||||
start of the grace period must propagate to every CPU before
|
||||
the start of the critical section.
|
||||
(1) C ends before G does, and in addition, every store that
|
||||
propagates to C's CPU before the end of C must propagate to
|
||||
every CPU before G ends.
|
||||
|
||||
(2) G starts before C does, and in addition, every store that
|
||||
propagates to G's CPU before the start of G must propagate
|
||||
to every CPU before C starts.
|
||||
|
||||
In particular, it is not possible for a critical section to both start
|
||||
before and end after a grace period.
|
||||
|
||||
Here is a simple example of RCU in action:
|
||||
|
||||
|
@ -1483,10 +1485,11 @@ The Grace Period Guarantee tells us that when this code runs, it will
|
|||
never end with r1 = 1 and r2 = 0. The reasoning is as follows. r1 = 1
|
||||
means that P0's store to x propagated to P1 before P1 called
|
||||
synchronize_rcu(), so P0's critical section must have started before
|
||||
P1's grace period. On the other hand, r2 = 0 means that P0's store to
|
||||
y, which occurs before the end of the critical section, did not
|
||||
propagate to P1 before the end of the grace period, violating the
|
||||
Guarantee.
|
||||
P1's grace period, contrary to part (2) of the Guarantee. On the
|
||||
other hand, r2 = 0 means that P0's store to y, which occurs before the
|
||||
end of the critical section, did not propagate to P1 before the end of
|
||||
the grace period, contrary to part (1). Together the results violate
|
||||
the Guarantee.
|
||||
|
||||
In the kernel's implementations of RCU, the requirements for stores
|
||||
to propagate to every CPU are fulfilled by placing strong fences at
|
||||
|
@ -1504,11 +1507,11 @@ before" or "ends after" a grace period? Some aspects of the meaning
|
|||
are pretty obvious, as in the example above, but the details aren't
|
||||
entirely clear. The LKMM formalizes this notion by means of the
|
||||
rcu-link relation. rcu-link encompasses a very general notion of
|
||||
"before": Among other things, X ->rcu-link Z includes cases where X
|
||||
happens-before or is equal to some event Y which is equal to or comes
|
||||
before Z in the coherence order. When Y = Z this says that X ->rfe Z
|
||||
implies X ->rcu-link Z. In addition, when Y = X it says that X ->fr Z
|
||||
and X ->co Z each imply X ->rcu-link Z.
|
||||
"before": If E and F are RCU fence events (i.e., rcu_read_lock(),
|
||||
rcu_read_unlock(), or synchronize_rcu()) then among other things,
|
||||
E ->rcu-link F includes cases where E is po-before some memory-access
|
||||
event X, F is po-after some memory-access event Y, and we have any of
|
||||
X ->rfe Y, X ->co Y, or X ->fr Y.
|
||||
|
||||
The formal definition of the rcu-link relation is more than a little
|
||||
obscure, and we won't give it here. It is closely related to the pb
|
||||
|
@ -1516,171 +1519,173 @@ relation, and the details don't matter unless you want to comb through
|
|||
a somewhat lengthy formal proof. Pretty much all you need to know
|
||||
about rcu-link is the information in the preceding paragraph.
|
||||
|
||||
The LKMM also defines the gp and rscs relations. They bring grace
|
||||
periods and read-side critical sections into the picture, in the
|
||||
The LKMM also defines the rcu-gp and rcu-rscsi relations. They bring
|
||||
grace periods and read-side critical sections into the picture, in the
|
||||
following way:
|
||||
|
||||
E ->gp F means there is a synchronize_rcu() fence event S such
|
||||
that E ->po S and either S ->po F or S = F. In simple terms,
|
||||
there is a grace period po-between E and F.
|
||||
E ->rcu-gp F means that E and F are in fact the same event,
|
||||
and that event is a synchronize_rcu() fence (i.e., a grace
|
||||
period).
|
||||
|
||||
E ->rscs F means there is a critical section delimited by an
|
||||
rcu_read_lock() fence L and an rcu_read_unlock() fence U, such
|
||||
that E ->po U and either L ->po F or L = F. You can think of
|
||||
this as saying that E and F are in the same critical section
|
||||
(in fact, it also allows E to be po-before the start of the
|
||||
critical section and F to be po-after the end).
|
||||
E ->rcu-rscsi F means that E and F are the rcu_read_unlock()
|
||||
and rcu_read_lock() fence events delimiting some read-side
|
||||
critical section. (The 'i' at the end of the name emphasizes
|
||||
that this relation is "inverted": It links the end of the
|
||||
critical section to the start.)
|
||||
|
||||
If we think of the rcu-link relation as standing for an extended
|
||||
"before", then X ->gp Y ->rcu-link Z says that X executes before a
|
||||
grace period which ends before Z executes. (In fact it covers more
|
||||
than this, because it also includes cases where X executes before a
|
||||
grace period and some store propagates to Z's CPU before Z executes
|
||||
but doesn't propagate to some other CPU until after the grace period
|
||||
ends.) Similarly, X ->rscs Y ->rcu-link Z says that X is part of (or
|
||||
before the start of) a critical section which starts before Z
|
||||
executes.
|
||||
"before", then X ->rcu-gp Y ->rcu-link Z roughly says that X is a
|
||||
grace period which ends before Z begins. (In fact it covers more than
|
||||
this, because it also includes cases where some store propagates to
|
||||
Z's CPU before Z begins but doesn't propagate to some other CPU until
|
||||
after X ends.) Similarly, X ->rcu-rscsi Y ->rcu-link Z says that X is
|
||||
the end of a critical section which starts before Z begins.
|
||||
|
||||
The LKMM goes on to define the rcu-fence relation as a sequence of gp
|
||||
and rscs links separated by rcu-link links, in which the number of gp
|
||||
links is >= the number of rscs links. For example:
|
||||
The LKMM goes on to define the rcu-fence relation as a sequence of
|
||||
rcu-gp and rcu-rscsi links separated by rcu-link links, in which the
|
||||
number of rcu-gp links is >= the number of rcu-rscsi links. For
|
||||
example:
|
||||
|
||||
X ->gp Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V
|
||||
X ->rcu-gp Y ->rcu-link Z ->rcu-rscsi T ->rcu-link U ->rcu-gp V
|
||||
|
||||
would imply that X ->rcu-fence V, because this sequence contains two
|
||||
gp links and only one rscs link. (It also implies that X ->rcu-fence T
|
||||
and Z ->rcu-fence V.) On the other hand:
|
||||
rcu-gp links and one rcu-rscsi link. (It also implies that
|
||||
X ->rcu-fence T and Z ->rcu-fence V.) On the other hand:
|
||||
|
||||
X ->rscs Y ->rcu-link Z ->rscs T ->rcu-link U ->gp V
|
||||
X ->rcu-rscsi Y ->rcu-link Z ->rcu-rscsi T ->rcu-link U ->rcu-gp V
|
||||
|
||||
does not imply X ->rcu-fence V, because the sequence contains only
|
||||
one gp link but two rscs links.
|
||||
one rcu-gp link but two rcu-rscsi links.
|
||||
|
||||
The rcu-fence relation is important because the Grace Period Guarantee
|
||||
means that rcu-fence acts kind of like a strong fence. In particular,
|
||||
if W is a write and we have W ->rcu-fence Z, the Guarantee says that W
|
||||
will propagate to every CPU before Z executes.
|
||||
E ->rcu-fence F implies not only that E begins before F ends, but also
|
||||
that any write po-before E will propagate to every CPU before any
|
||||
instruction po-after F can execute. (However, it does not imply that
|
||||
E must execute before F; in fact, each synchronize_rcu() fence event
|
||||
is linked to itself by rcu-fence as a degenerate case.)
|
||||
|
||||
To prove this in full generality requires some intellectual effort.
|
||||
We'll consider just a very simple case:
|
||||
|
||||
W ->gp X ->rcu-link Y ->rscs Z.
|
||||
G ->rcu-gp W ->rcu-link Z ->rcu-rscsi F.
|
||||
|
||||
This formula means that there is a grace period G and a critical
|
||||
section C such that:
|
||||
This formula means that G and W are the same event (a grace period),
|
||||
and there are events X, Y and a read-side critical section C such that:
|
||||
|
||||
1. W is po-before G;
|
||||
1. G = W is po-before or equal to X;
|
||||
|
||||
2. X is equal to or po-after G;
|
||||
2. X comes "before" Y in some sense (including rfe, co and fr);
|
||||
|
||||
3. X comes "before" Y in some sense;
|
||||
2. Y is po-before Z;
|
||||
|
||||
4. Y is po-before the end of C;
|
||||
4. Z is the rcu_read_unlock() event marking the end of C;
|
||||
|
||||
5. Z is equal to or po-after the start of C.
|
||||
5. F is the rcu_read_lock() event marking the start of C.
|
||||
|
||||
From 2 - 4 we deduce that the grace period G ends before the critical
|
||||
section C. Then the second part of the Grace Period Guarantee says
|
||||
not only that G starts before C does, but also that W (which executes
|
||||
on G's CPU before G starts) must propagate to every CPU before C
|
||||
starts. In particular, W propagates to every CPU before Z executes
|
||||
(or finishes executing, in the case where Z is equal to the
|
||||
rcu_read_lock() fence event which starts C.) This sort of reasoning
|
||||
can be expanded to handle all the situations covered by rcu-fence.
|
||||
From 1 - 4 we deduce that the grace period G ends before the critical
|
||||
section C. Then part (2) of the Grace Period Guarantee says not only
|
||||
that G starts before C does, but also that any write which executes on
|
||||
G's CPU before G starts must propagate to every CPU before C starts.
|
||||
In particular, the write propagates to every CPU before F finishes
|
||||
executing and hence before any instruction po-after F can execute.
|
||||
This sort of reasoning can be extended to handle all the situations
|
||||
covered by rcu-fence.
|
||||
|
||||
Finally, the LKMM defines the RCU-before (rb) relation in terms of
|
||||
rcu-fence. This is done in essentially the same way as the pb
|
||||
relation was defined in terms of strong-fence. We will omit the
|
||||
details; the end result is that E ->rb F implies E must execute before
|
||||
F, just as E ->pb F does (and for much the same reasons).
|
||||
details; the end result is that E ->rb F implies E must execute
|
||||
before F, just as E ->pb F does (and for much the same reasons).
|
||||
|
||||
Putting this all together, the LKMM expresses the Grace Period
|
||||
Guarantee by requiring that the rb relation does not contain a cycle.
|
||||
Equivalently, this "rcu" axiom requires that there are no events E and
|
||||
F with E ->rcu-link F ->rcu-fence E. Or to put it a third way, the
|
||||
axiom requires that there are no cycles consisting of gp and rscs
|
||||
alternating with rcu-link, where the number of gp links is >= the
|
||||
number of rscs links.
|
||||
Equivalently, this "rcu" axiom requires that there are no events E
|
||||
and F with E ->rcu-link F ->rcu-fence E. Or to put it a third way,
|
||||
the axiom requires that there are no cycles consisting of rcu-gp and
|
||||
rcu-rscsi alternating with rcu-link, where the number of rcu-gp links
|
||||
is >= the number of rcu-rscsi links.
|
||||
|
||||
Justifying the axiom isn't easy, but it is in fact a valid
|
||||
formalization of the Grace Period Guarantee. We won't attempt to go
|
||||
through the detailed argument, but the following analysis gives a
|
||||
taste of what is involved. Suppose we have a violation of the first
|
||||
part of the Guarantee: A critical section starts before a grace
|
||||
period, and some store propagates to the critical section's CPU before
|
||||
the end of the critical section but doesn't propagate to some other
|
||||
CPU until after the end of the grace period.
|
||||
taste of what is involved. Suppose both parts of the Guarantee are
|
||||
violated: A critical section starts before a grace period, and some
|
||||
store propagates to the critical section's CPU before the end of the
|
||||
critical section but doesn't propagate to some other CPU until after
|
||||
the end of the grace period.
|
||||
|
||||
Putting symbols to these ideas, let L and U be the rcu_read_lock() and
|
||||
rcu_read_unlock() fence events delimiting the critical section in
|
||||
question, and let S be the synchronize_rcu() fence event for the grace
|
||||
period. Saying that the critical section starts before S means there
|
||||
are events E and F where E is po-after L (which marks the start of the
|
||||
critical section), E is "before" F in the sense of the rcu-link
|
||||
relation, and F is po-before the grace period S:
|
||||
are events Q and R where Q is po-after L (which marks the start of the
|
||||
critical section), Q is "before" R in the sense used by the rcu-link
|
||||
relation, and R is po-before the grace period S. Thus we have:
|
||||
|
||||
L ->po E ->rcu-link F ->po S.
|
||||
L ->rcu-link S.
|
||||
|
||||
Let W be the store mentioned above, let Z come before the end of the
|
||||
Let W be the store mentioned above, let Y come before the end of the
|
||||
critical section and witness that W propagates to the critical
|
||||
section's CPU by reading from W, and let Y on some arbitrary CPU be a
|
||||
witness that W has not propagated to that CPU, where Y happens after
|
||||
section's CPU by reading from W, and let Z on some arbitrary CPU be a
|
||||
witness that W has not propagated to that CPU, where Z happens after
|
||||
some event X which is po-after S. Symbolically, this amounts to:
|
||||
|
||||
S ->po X ->hb* Y ->fr W ->rf Z ->po U.
|
||||
S ->po X ->hb* Z ->fr W ->rf Y ->po U.
|
||||
|
||||
The fr link from Y to W indicates that W has not propagated to Y's CPU
|
||||
at the time that Y executes. From this, it can be shown (see the
|
||||
discussion of the rcu-link relation earlier) that X and Z are related
|
||||
by rcu-link, yielding:
|
||||
The fr link from Z to W indicates that W has not propagated to Z's CPU
|
||||
at the time that Z executes. From this, it can be shown (see the
|
||||
discussion of the rcu-link relation earlier) that S and U are related
|
||||
by rcu-link:
|
||||
|
||||
S ->po X ->rcu-link Z ->po U.
|
||||
S ->rcu-link U.
|
||||
|
||||
The formulas say that S is po-between F and X, hence F ->gp X. They
|
||||
also say that Z comes before the end of the critical section and E
|
||||
comes after its start, hence Z ->rscs E. From all this we obtain:
|
||||
Since S is a grace period we have S ->rcu-gp S, and since L and U are
|
||||
the start and end of the critical section C we have U ->rcu-rscsi L.
|
||||
From this we obtain:
|
||||
|
||||
F ->gp X ->rcu-link Z ->rscs E ->rcu-link F,
|
||||
S ->rcu-gp S ->rcu-link U ->rcu-rscsi L ->rcu-link S,
|
||||
|
||||
a forbidden cycle. Thus the "rcu" axiom rules out this violation of
|
||||
the Grace Period Guarantee.
|
||||
|
||||
For something a little more down-to-earth, let's see how the axiom
|
||||
works out in practice. Consider the RCU code example from above, this
|
||||
time with statement labels added to the memory access instructions:
|
||||
time with statement labels added:
|
||||
|
||||
int x, y;
|
||||
|
||||
P0()
|
||||
{
|
||||
rcu_read_lock();
|
||||
W: WRITE_ONCE(x, 1);
|
||||
X: WRITE_ONCE(y, 1);
|
||||
rcu_read_unlock();
|
||||
L: rcu_read_lock();
|
||||
X: WRITE_ONCE(x, 1);
|
||||
Y: WRITE_ONCE(y, 1);
|
||||
U: rcu_read_unlock();
|
||||
}
|
||||
|
||||
P1()
|
||||
{
|
||||
int r1, r2;
|
||||
|
||||
Y: r1 = READ_ONCE(x);
|
||||
synchronize_rcu();
|
||||
Z: r2 = READ_ONCE(y);
|
||||
Z: r1 = READ_ONCE(x);
|
||||
S: synchronize_rcu();
|
||||
W: r2 = READ_ONCE(y);
|
||||
}
|
||||
|
||||
|
||||
If r2 = 0 at the end then P0's store at X overwrites the value that
|
||||
P1's load at Z reads from, so we have Z ->fre X and thus Z ->rcu-link X.
|
||||
In addition, there is a synchronize_rcu() between Y and Z, so therefore
|
||||
we have Y ->gp Z.
|
||||
If r2 = 0 at the end then P0's store at Y overwrites the value that
|
||||
P1's load at W reads from, so we have W ->fre Y. Since S ->po W and
|
||||
also Y ->po U, we get S ->rcu-link U. In addition, S ->rcu-gp S
|
||||
because S is a grace period.
|
||||
|
||||
If r1 = 1 at the end then P1's load at Y reads from P0's store at W,
|
||||
so we have W ->rcu-link Y. In addition, W and X are in the same critical
|
||||
section, so therefore we have X ->rscs W.
|
||||
If r1 = 1 at the end then P1's load at Z reads from P0's store at X,
|
||||
so we have X ->rfe Z. Together with L ->po X and Z ->po S, this
|
||||
yields L ->rcu-link S. And since L and U are the start and end of a
|
||||
critical section, we have U ->rcu-rscsi L.
|
||||
|
||||
Then X ->rscs W ->rcu-link Y ->gp Z ->rcu-link X is a forbidden cycle,
|
||||
violating the "rcu" axiom. Hence the outcome is not allowed by the
|
||||
LKMM, as we would expect.
|
||||
Then U ->rcu-rscsi L ->rcu-link S ->rcu-gp S ->rcu-link U is a
|
||||
forbidden cycle, violating the "rcu" axiom. Hence the outcome is not
|
||||
allowed by the LKMM, as we would expect.
|
||||
|
||||
For contrast, let's see what can happen in a more complicated example:
|
||||
|
||||
|
@ -1690,51 +1695,52 @@ For contrast, let's see what can happen in a more complicated example:
|
|||
{
|
||||
int r0;
|
||||
|
||||
rcu_read_lock();
|
||||
W: r0 = READ_ONCE(x);
|
||||
X: WRITE_ONCE(y, 1);
|
||||
rcu_read_unlock();
|
||||
L0: rcu_read_lock();
|
||||
r0 = READ_ONCE(x);
|
||||
WRITE_ONCE(y, 1);
|
||||
U0: rcu_read_unlock();
|
||||
}
|
||||
|
||||
P1()
|
||||
{
|
||||
int r1;
|
||||
|
||||
Y: r1 = READ_ONCE(y);
|
||||
synchronize_rcu();
|
||||
Z: WRITE_ONCE(z, 1);
|
||||
r1 = READ_ONCE(y);
|
||||
S1: synchronize_rcu();
|
||||
WRITE_ONCE(z, 1);
|
||||
}
|
||||
|
||||
P2()
|
||||
{
|
||||
int r2;
|
||||
|
||||
rcu_read_lock();
|
||||
U: r2 = READ_ONCE(z);
|
||||
V: WRITE_ONCE(x, 1);
|
||||
rcu_read_unlock();
|
||||
L2: rcu_read_lock();
|
||||
r2 = READ_ONCE(z);
|
||||
WRITE_ONCE(x, 1);
|
||||
U2: rcu_read_unlock();
|
||||
}
|
||||
|
||||
If r0 = r1 = r2 = 1 at the end, then similar reasoning to before shows
|
||||
that W ->rscs X ->rcu-link Y ->gp Z ->rcu-link U ->rscs V ->rcu-link W.
|
||||
However this cycle is not forbidden, because the sequence of relations
|
||||
contains fewer instances of gp (one) than of rscs (two). Consequently
|
||||
the outcome is allowed by the LKMM. The following instruction timing
|
||||
diagram shows how it might actually occur:
|
||||
that U0 ->rcu-rscsi L0 ->rcu-link S1 ->rcu-gp S1 ->rcu-link U2 ->rcu-rscsi
|
||||
L2 ->rcu-link U0. However this cycle is not forbidden, because the
|
||||
sequence of relations contains fewer instances of rcu-gp (one) than of
|
||||
rcu-rscsi (two). Consequently the outcome is allowed by the LKMM.
|
||||
The following instruction timing diagram shows how it might actually
|
||||
occur:
|
||||
|
||||
P0 P1 P2
|
||||
-------------------- -------------------- --------------------
|
||||
rcu_read_lock()
|
||||
X: WRITE_ONCE(y, 1)
|
||||
Y: r1 = READ_ONCE(y)
|
||||
WRITE_ONCE(y, 1)
|
||||
r1 = READ_ONCE(y)
|
||||
synchronize_rcu() starts
|
||||
. rcu_read_lock()
|
||||
. V: WRITE_ONCE(x, 1)
|
||||
W: r0 = READ_ONCE(x) .
|
||||
. WRITE_ONCE(x, 1)
|
||||
r0 = READ_ONCE(x) .
|
||||
rcu_read_unlock() .
|
||||
synchronize_rcu() ends
|
||||
Z: WRITE_ONCE(z, 1)
|
||||
U: r2 = READ_ONCE(z)
|
||||
WRITE_ONCE(z, 1)
|
||||
r2 = READ_ONCE(z)
|
||||
rcu_read_unlock()
|
||||
|
||||
This requires P0 and P2 to execute their loads and stores out of
|
||||
|
@ -1744,6 +1750,15 @@ section in P0 both starts before P1's grace period does and ends
|
|||
before it does, and the critical section in P2 both starts after P1's
|
||||
grace period does and ends after it does.
|
||||
|
||||
Addendum: The LKMM now supports SRCU (Sleepable Read-Copy-Update) in
|
||||
addition to normal RCU. The ideas involved are much the same as
|
||||
above, with new relations srcu-gp and srcu-rscsi added to represent
|
||||
SRCU grace periods and read-side critical sections. There is a
|
||||
restriction on the srcu-gp and srcu-rscsi links that can appear in an
|
||||
rcu-fence sequence (the srcu-rscsi links must be paired with srcu-gp
|
||||
links having the same SRCU domain with proper nesting); the details
|
||||
are relatively unimportant.
|
||||
|
||||
|
||||
LOCKING
|
||||
-------
|
||||
|
|
|
@ -20,13 +20,17 @@ that litmus test to be exercised within the Linux kernel.
|
|||
REQUIREMENTS
|
||||
============
|
||||
|
||||
Version 7.49 of the "herd7" and "klitmus7" tools must be downloaded
|
||||
separately:
|
||||
Version 7.52 or higher of the "herd7" and "klitmus7" tools must be
|
||||
downloaded separately:
|
||||
|
||||
https://github.com/herd/herdtools7
|
||||
|
||||
See "herdtools7/INSTALL.md" for installation instructions.
|
||||
|
||||
Note that although these tools usually provide backwards compatibility,
|
||||
this is not absolutely guaranteed. Therefore, if a later version does
|
||||
not work, please try using the exact version called out above.
|
||||
|
||||
|
||||
==================
|
||||
BASIC USAGE: HERD7
|
||||
|
@ -221,8 +225,29 @@ The Linux-kernel memory model has the following limitations:
|
|||
additional call_rcu() process to the site of the
|
||||
emulated rcu-barrier().
|
||||
|
||||
e. Sleepable RCU (SRCU) is not modeled. It can be
|
||||
emulated, but perhaps not simply.
|
||||
e. Although sleepable RCU (SRCU) is now modeled, there
|
||||
are some subtle differences between its semantics and
|
||||
those in the Linux kernel. For example, the kernel
|
||||
might interpret the following sequence as two partially
|
||||
overlapping SRCU read-side critical sections:
|
||||
|
||||
1 r1 = srcu_read_lock(&my_srcu);
|
||||
2 do_something_1();
|
||||
3 r2 = srcu_read_lock(&my_srcu);
|
||||
4 do_something_2();
|
||||
5 srcu_read_unlock(&my_srcu, r1);
|
||||
6 do_something_3();
|
||||
7 srcu_read_unlock(&my_srcu, r2);
|
||||
|
||||
In contrast, LKMM will interpret this as a nested pair of
|
||||
SRCU read-side critical sections, with the outer critical
|
||||
section spanning lines 1-7 and the inner critical section
|
||||
spanning lines 3-5.
|
||||
|
||||
This difference would be more of a concern had anyone
|
||||
identified a reasonable use case for partially overlapping
|
||||
SRCU read-side critical sections. For more information,
|
||||
please see: https://paulmck.livejournal.com/40593.html
|
||||
|
||||
f. Reader-writer locking is not modeled. It can be
|
||||
emulated in litmus tests using atomic read-modify-write
|
||||
|
|
|
@ -33,8 +33,14 @@ enum Barriers = 'wmb (*smp_wmb*) ||
|
|||
'after-unlock-lock (*smp_mb__after_unlock_lock*)
|
||||
instructions F[Barriers]
|
||||
|
||||
(* SRCU *)
|
||||
enum SRCU = 'srcu-lock || 'srcu-unlock || 'sync-srcu
|
||||
instructions SRCU[SRCU]
|
||||
(* All srcu events *)
|
||||
let Srcu = Srcu-lock | Srcu-unlock | Sync-srcu
|
||||
|
||||
(* Compute matching pairs of nested Rcu-lock and Rcu-unlock *)
|
||||
let matched = let rec
|
||||
let rcu-rscs = let rec
|
||||
unmatched-locks = Rcu-lock \ domain(matched)
|
||||
and unmatched-unlocks = Rcu-unlock \ range(matched)
|
||||
and unmatched = unmatched-locks | unmatched-unlocks
|
||||
|
@ -46,8 +52,27 @@ let matched = let rec
|
|||
in matched
|
||||
|
||||
(* Validate nesting *)
|
||||
flag ~empty Rcu-lock \ domain(matched) as unbalanced-rcu-locking
|
||||
flag ~empty Rcu-unlock \ range(matched) as unbalanced-rcu-locking
|
||||
flag ~empty Rcu-lock \ domain(rcu-rscs) as unbalanced-rcu-locking
|
||||
flag ~empty Rcu-unlock \ range(rcu-rscs) as unbalanced-rcu-locking
|
||||
|
||||
(* Outermost level of nesting only *)
|
||||
let crit = matched \ (po^-1 ; matched ; po^-1)
|
||||
(* Compute matching pairs of nested Srcu-lock and Srcu-unlock *)
|
||||
let srcu-rscs = let rec
|
||||
unmatched-locks = Srcu-lock \ domain(matched)
|
||||
and unmatched-unlocks = Srcu-unlock \ range(matched)
|
||||
and unmatched = unmatched-locks | unmatched-unlocks
|
||||
and unmatched-po = ([unmatched] ; po ; [unmatched]) & loc
|
||||
and unmatched-locks-to-unlocks =
|
||||
([unmatched-locks] ; po ; [unmatched-unlocks]) & loc
|
||||
and matched = matched | (unmatched-locks-to-unlocks \
|
||||
(unmatched-po ; unmatched-po))
|
||||
in matched
|
||||
|
||||
(* Validate nesting *)
|
||||
flag ~empty Srcu-lock \ domain(srcu-rscs) as unbalanced-srcu-locking
|
||||
flag ~empty Srcu-unlock \ range(srcu-rscs) as unbalanced-srcu-locking
|
||||
|
||||
(* Check for use of synchronize_srcu() inside an RCU critical section *)
|
||||
flag ~empty rcu-rscs & (po ; [Sync-srcu] ; po) as invalid-sleep
|
||||
|
||||
(* Validate SRCU dynamic match *)
|
||||
flag ~empty different-values(srcu-rscs) as srcu-bad-nesting
|
||||
|
|
|
@ -33,7 +33,7 @@ let mb = ([M] ; fencerel(Mb) ; [M]) |
|
|||
([M] ; po? ; [LKW] ; fencerel(After-spinlock) ; [M]) |
|
||||
([M] ; po ; [UL] ; (co | po) ; [LKW] ;
|
||||
fencerel(After-unlock-lock) ; [M])
|
||||
let gp = po ; [Sync-rcu] ; po?
|
||||
let gp = po ; [Sync-rcu | Sync-srcu] ; po?
|
||||
|
||||
let strong-fence = mb | gp
|
||||
|
||||
|
@ -91,32 +91,47 @@ acyclic pb as propagation
|
|||
(*******)
|
||||
|
||||
(*
|
||||
* Effect of read-side critical section proceeds from the rcu_read_lock()
|
||||
* onward on the one hand and from the rcu_read_unlock() backwards on the
|
||||
* other hand.
|
||||
* Effects of read-side critical sections proceed from the rcu_read_unlock()
|
||||
* or srcu_read_unlock() backwards on the one hand, and from the
|
||||
* rcu_read_lock() or srcu_read_lock() forwards on the other hand.
|
||||
*
|
||||
* In the definition of rcu-fence below, the po term at the left-hand side
|
||||
* of each disjunct and the po? term at the right-hand end have been factored
|
||||
* out. They have been moved into the definitions of rcu-link and rb.
|
||||
* This was necessary in order to apply the "& loc" tests correctly.
|
||||
*)
|
||||
let rscs = po ; crit^-1 ; po?
|
||||
let rcu-gp = [Sync-rcu] (* Compare with gp *)
|
||||
let srcu-gp = [Sync-srcu]
|
||||
let rcu-rscsi = rcu-rscs^-1
|
||||
let srcu-rscsi = srcu-rscs^-1
|
||||
|
||||
(*
|
||||
* The synchronize_rcu() strong fence is special in that it can order not
|
||||
* one but two non-rf relations, but only in conjunction with an RCU
|
||||
* read-side critical section.
|
||||
*)
|
||||
let rcu-link = hb* ; pb* ; prop
|
||||
let rcu-link = po? ; hb* ; pb* ; prop ; po
|
||||
|
||||
(*
|
||||
* Any sequence containing at least as many grace periods as RCU read-side
|
||||
* critical sections (joined by rcu-link) acts as a generalized strong fence.
|
||||
* Likewise for SRCU grace periods and read-side critical sections, provided
|
||||
* the synchronize_srcu() and srcu_read_[un]lock() calls refer to the same
|
||||
* struct srcu_struct location.
|
||||
*)
|
||||
let rec rcu-fence = gp |
|
||||
(gp ; rcu-link ; rscs) |
|
||||
(rscs ; rcu-link ; gp) |
|
||||
(gp ; rcu-link ; rcu-fence ; rcu-link ; rscs) |
|
||||
(rscs ; rcu-link ; rcu-fence ; rcu-link ; gp) |
|
||||
let rec rcu-fence = rcu-gp | srcu-gp |
|
||||
(rcu-gp ; rcu-link ; rcu-rscsi) |
|
||||
((srcu-gp ; rcu-link ; srcu-rscsi) & loc) |
|
||||
(rcu-rscsi ; rcu-link ; rcu-gp) |
|
||||
((srcu-rscsi ; rcu-link ; srcu-gp) & loc) |
|
||||
(rcu-gp ; rcu-link ; rcu-fence ; rcu-link ; rcu-rscsi) |
|
||||
((srcu-gp ; rcu-link ; rcu-fence ; rcu-link ; srcu-rscsi) & loc) |
|
||||
(rcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; rcu-gp) |
|
||||
((srcu-rscsi ; rcu-link ; rcu-fence ; rcu-link ; srcu-gp) & loc) |
|
||||
(rcu-fence ; rcu-link ; rcu-fence)
|
||||
|
||||
(* rb orders instructions just as pb does *)
|
||||
let rb = prop ; rcu-fence ; hb* ; pb*
|
||||
let rb = prop ; po ; rcu-fence ; po? ; hb* ; pb*
|
||||
|
||||
irreflexive rb as rcu
|
||||
|
||||
|
|
|
@ -47,6 +47,11 @@ rcu_read_unlock() { __fence{rcu-unlock}; }
|
|||
synchronize_rcu() { __fence{sync-rcu}; }
|
||||
synchronize_rcu_expedited() { __fence{sync-rcu}; }
|
||||
|
||||
// SRCU
|
||||
srcu_read_lock(X) __srcu{srcu-lock}(X)
|
||||
srcu_read_unlock(X,Y) { __srcu{srcu-unlock}(X,Y); }
|
||||
synchronize_srcu(X) { __srcu{sync-srcu}(X); }
|
||||
|
||||
// Atomic
|
||||
atomic_read(X) READ_ONCE(*X)
|
||||
atomic_set(X,V) { WRITE_ONCE(*X,V); }
|
||||
|
|
|
@ -6,9 +6,6 @@
|
|||
|
||||
(*
|
||||
* Generate coherence orders and handle lock operations
|
||||
*
|
||||
* Warning: spin_is_locked() crashes herd7 versions strictly before 7.48.
|
||||
* spin_is_locked() is functional from herd7 version 7.49.
|
||||
*)
|
||||
|
||||
include "cross.cat"
|
||||
|
|
Loading…
Reference in New Issue