Don't send unnecessary PING to replicas (#13790)

The reason why master sends PING is to keep the connection with replica
active, so master need not send PING to replicas if already sent
replication stream in the past `repl_ping_slave_period` time.

Now master only sends PINGs and increases `master_repl_offset` if there
is no traffic, so this PR also can reduce the impact of issue in
https://github.com/redis/redis/pull/13773, of course, does not resolve
it completely.
> Fix ping causing inconsistency between AOF size and replication offset
in the future PR. Because we increment the replication offset when
sending PING/REPLCONF to the replica but do not write data to the AOF
file, this might cause the starting offset of the AOF file plus its size
to be inconsistent with the actual replication offset.
This commit is contained in:
Yuan Wang 2025-02-13 10:52:19 +08:00 committed by GitHub
parent 87124a38b6
commit 662cb2fe75
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 11 additions and 6 deletions

View File

@ -519,6 +519,9 @@ void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
/* We can't have slaves attached and no backlog. */ /* We can't have slaves attached and no backlog. */
serverAssert(!(listLength(slaves) != 0 && server.repl_backlog == NULL)); serverAssert(!(listLength(slaves) != 0 && server.repl_backlog == NULL));
/* Update the time of sending replication stream to replicas. */
server.repl_stream_lastio = server.unixtime;
/* Must install write handler for all replicas first before feeding /* Must install write handler for all replicas first before feeding
* replication stream. */ * replication stream. */
prepareReplicasToWrite(); prepareReplicasToWrite();
@ -4479,8 +4482,6 @@ long long replicationGetSlaveOffset(void) {
/* Replication cron function, called 1 time per second. */ /* Replication cron function, called 1 time per second. */
void replicationCron(void) { void replicationCron(void) {
static long long replication_cron_loops = 0;
/* Check failover status first, to see if we need to start /* Check failover status first, to see if we need to start
* handling the failover. */ * handling the failover. */
updateFailoverStatus(); updateFailoverStatus();
@ -4533,9 +4534,12 @@ void replicationCron(void) {
listNode *ln; listNode *ln;
robj *ping_argv[1]; robj *ping_argv[1];
/* First, send PING according to ping_slave_period. */ /* First, send PING according to ping_slave_period. The reason why master
if ((replication_cron_loops % server.repl_ping_slave_period) == 0 && * sends PING is to keep the connection with replica active, so master need
listLength(server.slaves)) * not send PING to replicas if already sent replication stream in the past
* repl_ping_slave_period time. */
if (server.masterhost == NULL && listLength(server.slaves) &&
server.unixtime >= server.repl_stream_lastio + server.repl_ping_slave_period)
{ {
/* Note that we don't send the PING if the clients are paused during /* Note that we don't send the PING if the clients are paused during
* a Redis Cluster manual failover: the PING we send will otherwise * a Redis Cluster manual failover: the PING we send will otherwise
@ -4672,7 +4676,6 @@ void replicationCron(void) {
/* Refresh the number of slaves with lag <= min-slaves-max-lag. */ /* Refresh the number of slaves with lag <= min-slaves-max-lag. */
refreshGoodSlavesCount(); refreshGoodSlavesCount();
replication_cron_loops++; /* Incremented with frequency 1 HZ. */
} }
int shouldStartChildReplication(int *mincapa_out, int *req_out) { int shouldStartChildReplication(int *mincapa_out, int *req_out) {

View File

@ -2192,6 +2192,7 @@ void initServerConfig(void) {
server.repl_down_since = 0; /* Never connected, repl is down since EVER. */ server.repl_down_since = 0; /* Never connected, repl is down since EVER. */
server.master_repl_offset = 0; server.master_repl_offset = 0;
server.fsynced_reploff_pending = 0; server.fsynced_reploff_pending = 0;
server.repl_stream_lastio = server.unixtime;
/* Replication partial resync backlog */ /* Replication partial resync backlog */
server.repl_backlog = NULL; server.repl_backlog = NULL;

View File

@ -2012,6 +2012,7 @@ struct redisServer {
size_t repl_buffer_mem; /* The memory of replication buffer. */ size_t repl_buffer_mem; /* The memory of replication buffer. */
list *repl_buffer_blocks; /* Replication buffers blocks list list *repl_buffer_blocks; /* Replication buffers blocks list
* (serving replica clients and repl backlog) */ * (serving replica clients and repl backlog) */
time_t repl_stream_lastio; /* Unix time of the latest sending replication stream. */
/* Replication (slave) */ /* Replication (slave) */
char *masteruser; /* AUTH with this user and masterauth with master */ char *masteruser; /* AUTH with this user and masterauth with master */
sds masterauth; /* AUTH with this password with master */ sds masterauth; /* AUTH with this password with master */