mirror of https://mirror.osredm.com/root/redis.git
Fix flaky cluster tests in 24-links.tcl (#10157)
* Fix flaky cluster test "Disconnect link when send buffer limit reached" * Fix flaky cluster test "Each node has two links with each peer" Co-authored-by: Madelyn Olson <madelyneolson@gmail.com>
This commit is contained in:
parent
7e5ded2ad0
commit
b40a9ba5fd
|
@ -8,18 +8,27 @@ test "Cluster should start ok" {
|
||||||
assert_cluster_state ok
|
assert_cluster_state ok
|
||||||
}
|
}
|
||||||
|
|
||||||
|
proc number_of_peers {id} {
|
||||||
|
expr [llength [get_cluster_nodes $id]] - 1
|
||||||
|
}
|
||||||
|
|
||||||
|
proc number_of_links {id} {
|
||||||
|
llength [get_cluster_links $id]
|
||||||
|
}
|
||||||
|
|
||||||
test "Each node has two links with each peer" {
|
test "Each node has two links with each peer" {
|
||||||
foreach_redis_id id {
|
foreach_redis_id id {
|
||||||
# Get number of peers, excluding myself
|
# Assert that from point of view of each node, there are two links for
|
||||||
|
# each peer. It might take a while for cluster to stabilize so wait up
|
||||||
|
# to 5 seconds.
|
||||||
|
wait_for_condition 50 100 {
|
||||||
|
[number_of_peers $id]*2 == [number_of_links $id]
|
||||||
|
} else {
|
||||||
|
assert_equal [expr [number_of_peers $id]*2] [number_of_links $id]
|
||||||
|
}
|
||||||
|
|
||||||
set nodes [get_cluster_nodes $id]
|
set nodes [get_cluster_nodes $id]
|
||||||
set num_peers [expr [llength $nodes] - 1]
|
|
||||||
|
|
||||||
# Get number of links to peers
|
|
||||||
set links [get_cluster_links $id]
|
set links [get_cluster_links $id]
|
||||||
set num_links [llength $links]
|
|
||||||
|
|
||||||
# Two links per peer
|
|
||||||
assert {$num_peers*2 eq $num_links}
|
|
||||||
|
|
||||||
# For each peer there should be exactly one
|
# For each peer there should be exactly one
|
||||||
# link "to" it and one link "from" it.
|
# link "to" it and one link "from" it.
|
||||||
|
@ -59,9 +68,12 @@ test "Disconnect link when send buffer limit reached" {
|
||||||
set orig_link_p1_to_p2 [get_link_to_peer $primary1_id $primary2_name]
|
set orig_link_p1_to_p2 [get_link_to_peer $primary1_id $primary2_name]
|
||||||
set orig_link_p1_from_p2 [get_link_from_peer $primary1_id $primary2_name]
|
set orig_link_p1_from_p2 [get_link_from_peer $primary1_id $primary2_name]
|
||||||
|
|
||||||
# On primary1, set cluster link send buffer limit to 32MB
|
# On primary1, set cluster link send buffer limit to 256KB, which is large enough to not be
|
||||||
|
# overflowed by regular gossip messages but also small enough that it doesn't take too much
|
||||||
|
# memory to overflow it. If it is set too high, Redis may get OOM killed by kernel before this
|
||||||
|
# limit is overflowed in some RAM-limited test environments.
|
||||||
set oldlimit [lindex [$primary1 CONFIG get cluster-link-sendbuf-limit] 1]
|
set oldlimit [lindex [$primary1 CONFIG get cluster-link-sendbuf-limit] 1]
|
||||||
$primary1 CONFIG set cluster-link-sendbuf-limit [expr 32*1024*1024]
|
$primary1 CONFIG set cluster-link-sendbuf-limit [expr 256*1024]
|
||||||
assert {[get_info_field [$primary1 cluster info] total_cluster_links_buffer_limit_exceeded] eq 0}
|
assert {[get_info_field [$primary1 cluster info] total_cluster_links_buffer_limit_exceeded] eq 0}
|
||||||
|
|
||||||
# To manufacture an ever-growing send buffer from primary1 to primary2,
|
# To manufacture an ever-growing send buffer from primary1 to primary2,
|
||||||
|
@ -69,22 +81,25 @@ test "Disconnect link when send buffer limit reached" {
|
||||||
set primary2_pid [get_instance_attrib redis $primary2_id pid]
|
set primary2_pid [get_instance_attrib redis $primary2_id pid]
|
||||||
exec kill -SIGSTOP $primary2_pid
|
exec kill -SIGSTOP $primary2_pid
|
||||||
|
|
||||||
# On primary1, send a 10MB Pubsub message. It will stay in send buffer of
|
# On primary1, send 128KB Pubsub messages in a loop until the send buffer of the link from
|
||||||
# the link from primary1 to primary2
|
# primary1 to primary2 exceeds buffer limit therefore be dropped.
|
||||||
$primary1 publish channel [prepare_value [expr 10*1024*1024]]
|
# For the send buffer to grow, we need to first exhaust TCP send buffer of primary1 and TCP
|
||||||
|
# receive buffer of primary2 first. The sizes of these two buffers vary by OS, but 100 128KB
|
||||||
|
# messages should be sufficient.
|
||||||
|
set i 0
|
||||||
|
wait_for_condition 100 0 {
|
||||||
|
[catch {incr i} e] == 0 &&
|
||||||
|
[catch {$primary1 publish channel [prepare_value [expr 128*1024]]} e] == 0 &&
|
||||||
|
[catch {after 500} e] == 0 &&
|
||||||
|
[get_info_field [$primary1 cluster info] total_cluster_links_buffer_limit_exceeded] eq 1
|
||||||
|
} else {
|
||||||
|
fail "Cluster link not freed as expected"
|
||||||
|
}
|
||||||
|
puts -nonewline "$i 128KB messages needed to overflow 256KB buffer limit. "
|
||||||
|
|
||||||
# Check the same link has not been disconnected, but its send buffer has grown
|
# A new link to primary2 should have been recreated
|
||||||
set same_link_p1_to_p2 [get_link_to_peer $primary1_id $primary2_name]
|
|
||||||
assert {[dict get $same_link_p1_to_p2 create_time] eq [dict get $orig_link_p1_to_p2 create_time]}
|
|
||||||
assert {[dict get $same_link_p1_to_p2 send_buffer_allocated] > [dict get $orig_link_p1_to_p2 send_buffer_allocated]}
|
|
||||||
|
|
||||||
# On primary1, send another 30MB Pubsub message.
|
|
||||||
$primary1 publish channel [prepare_value [expr 30*1024*1024]]
|
|
||||||
|
|
||||||
# Link has exceeded buffer limit and been dropped and recreated
|
|
||||||
set new_link_p1_to_p2 [get_link_to_peer $primary1_id $primary2_name]
|
set new_link_p1_to_p2 [get_link_to_peer $primary1_id $primary2_name]
|
||||||
assert {[dict get $new_link_p1_to_p2 create_time] > [dict get $orig_link_p1_to_p2 create_time]}
|
assert {[dict get $new_link_p1_to_p2 create_time] > [dict get $orig_link_p1_to_p2 create_time]}
|
||||||
assert {[get_info_field [$primary1 cluster info] total_cluster_links_buffer_limit_exceeded] eq 1}
|
|
||||||
|
|
||||||
# Link from primary2 should not be affected
|
# Link from primary2 should not be affected
|
||||||
set same_link_p1_from_p2 [get_link_from_peer $primary1_id $primary2_name]
|
set same_link_p1_from_p2 [get_link_from_peer $primary1_id $primary2_name]
|
||||||
|
|
Loading…
Reference in New Issue