mirror of https://mirror.osredm.com/root/redis.git
Stabilize cluster hostnames tests (#11307)
This PR introduces a couple of changes to improve cluster test stability: 1. Increase the cluster node timeout to 3 seconds, which is similar to the normal cluster tests, but introduce a new mechanism to increase the ping period so that the tests are still fast. This new config is a debug config. 2. Set `cluster-replica-no-failover yes` on a wider array of tests which are sensitive to failovers. This was occurring on the ARM CI.
This commit is contained in:
parent
a549b78c48
commit
663fbd3459
|
@ -4198,9 +4198,11 @@ void clusterCron(void) {
|
|||
* received PONG is older than half the cluster timeout, send
|
||||
* a new ping now, to ensure all the nodes are pinged without
|
||||
* a too big delay. */
|
||||
mstime_t ping_interval = server.cluster_ping_interval ?
|
||||
server.cluster_ping_interval : server.cluster_node_timeout/2;
|
||||
if (node->link &&
|
||||
node->ping_sent == 0 &&
|
||||
(now - node->pong_received) > server.cluster_node_timeout/2)
|
||||
(now - node->pong_received) > ping_interval)
|
||||
{
|
||||
clusterSendPing(node->link, CLUSTERMSG_TYPE_PING);
|
||||
continue;
|
||||
|
|
|
@ -3116,6 +3116,7 @@ standardConfig static_configs[] = {
|
|||
/* Long Long configs */
|
||||
createLongLongConfig("busy-reply-threshold", "lua-time-limit", MODIFIABLE_CONFIG, 0, LONG_MAX, server.busy_reply_threshold, 5000, INTEGER_CONFIG, NULL, NULL),/* milliseconds */
|
||||
createLongLongConfig("cluster-node-timeout", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.cluster_node_timeout, 15000, INTEGER_CONFIG, NULL, NULL),
|
||||
createLongLongConfig("cluster-ping-interval", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 0, LLONG_MAX, server.cluster_ping_interval, 0, INTEGER_CONFIG, NULL, NULL),
|
||||
createLongLongConfig("slowlog-log-slower-than", NULL, MODIFIABLE_CONFIG, -1, LLONG_MAX, server.slowlog_log_slower_than, 10000, INTEGER_CONFIG, NULL, NULL),
|
||||
createLongLongConfig("latency-monitor-threshold", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.latency_monitor_threshold, 0, INTEGER_CONFIG, NULL, NULL),
|
||||
createLongLongConfig("proto-max-bulk-len", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, 1024*1024, LONG_MAX, server.proto_max_bulk_len, 512ll*1024*1024, MEMORY_CONFIG, NULL, NULL), /* Bulk request max size */
|
||||
|
|
|
@ -1852,6 +1852,7 @@ struct redisServer {
|
|||
int cluster_enabled; /* Is cluster enabled? */
|
||||
int cluster_port; /* Set the cluster port for a node. */
|
||||
mstime_t cluster_node_timeout; /* Cluster node timeout. */
|
||||
mstime_t cluster_ping_interval; /* A debug configuration for setting how often cluster nodes send ping messages. */
|
||||
char *cluster_configfile; /* Cluster auto-generated config file name. */
|
||||
struct clusterState *cluster; /* State of the cluster */
|
||||
int cluster_migration_barrier; /* Cluster replicas migration barrier. */
|
||||
|
|
|
@ -103,7 +103,7 @@ proc start_cluster {masters replicas options code {slot_allocator continuous_slo
|
|||
|
||||
# Configure the starting of multiple servers. Set cluster node timeout
|
||||
# aggressively since many tests depend on ping/pong messages.
|
||||
set cluster_options [list overrides [list cluster-enabled yes cluster-node-timeout 500]]
|
||||
set cluster_options [list overrides [list cluster-enabled yes cluster-ping-interval 100 cluster-node-timeout 3000]]
|
||||
set options [concat $cluster_options $options]
|
||||
|
||||
# Cluster mode only supports a single database, so before executing the tests
|
||||
|
|
|
@ -418,7 +418,7 @@ proc start_server {options {code undefined}} {
|
|||
set baseconfig $value
|
||||
}
|
||||
"overrides" {
|
||||
set overrides $value
|
||||
set overrides [concat $overrides $value]
|
||||
}
|
||||
"config_lines" {
|
||||
set config_lines $value
|
||||
|
|
|
@ -42,8 +42,9 @@ proc get_slot_field {slot_output shard_id node_id attrib_id} {
|
|||
return [lindex [lindex [lindex $slot_output $shard_id] $node_id] $attrib_id]
|
||||
}
|
||||
|
||||
# Start a cluster with 3 masters and 4 replicas.
|
||||
start_cluster 3 4 {tags {external:skip cluster}} {
|
||||
# Start a cluster with 3 masters and 4 replicas.
|
||||
# These tests rely on specific node ordering, so make sure no node fails over.
|
||||
start_cluster 3 4 {tags {external:skip cluster} overrides {cluster-replica-no-failover yes}} {
|
||||
test "Set cluster hostnames and verify they are propagated" {
|
||||
for {set j 0} {$j < [llength $::servers]} {incr j} {
|
||||
R $j config set cluster-announce-hostname "host-$j.com"
|
||||
|
@ -202,7 +203,9 @@ test "Verify the nodes configured with prefer hostname only show hostname for ne
|
|||
R 0 DEBUG DROP-CLUSTER-PACKET-FILTER -1
|
||||
R 6 DEBUG DROP-CLUSTER-PACKET-FILTER -1
|
||||
|
||||
wait_for_condition 50 100 {
|
||||
# This operation sometimes spikes to around 5 seconds to resolve the state,
|
||||
# so it has a higher timeout.
|
||||
wait_for_condition 50 500 {
|
||||
[llength [R 6 CLUSTER SLOTS]] eq 3
|
||||
} else {
|
||||
fail "Node did not learn about the 2 shards it can talk to"
|
||||
|
@ -220,10 +223,6 @@ test "Test restart will keep hostname information" {
|
|||
# Store the hostname in the config
|
||||
R 0 config rewrite
|
||||
|
||||
# If the primary is slow to reboot it might get demoted, so prevent the replica
|
||||
# from nominating itself.
|
||||
R 3 config set cluster-replica-no-failover yes
|
||||
|
||||
restart_server 0 true false
|
||||
set slot_result [R 0 CLUSTER SLOTS]
|
||||
assert_equal [lindex [get_slot_field $slot_result 0 2 3] 1] "restart-1.com"
|
||||
|
|
Loading…
Reference in New Issue