Put replica online when bgsave is done (#13895)

Before https://github.com/redis/redis/pull/13732, replicas were brought
online immediately after master wrote the last bytes of the RDB file to
the socket. This behavior remains unchanged if rdbchannel replication is
not used. However, with rdbchannel replication, the replica is brought
online after receiving the first ack which is sent by replica after rdb
is loaded.

To align the behavior, reverting this change to put replica online once
bgsave is done.

Additonal changes:
- INFO field `mem_total_replication_buffers` will also contain
`server.repl_full_sync_buffer.mem_used` which shows accumulated
replication stream during rdbchannel replication on replica side.
- Deleted debug level logging from some replication tests. These tests
generate thousands of keys and it may cause per key logging on some
cases.
This commit is contained in:
Ozan Tezcan 2025-03-31 13:48:49 +03:00 committed by GitHub
parent aa8e2d1712
commit 366c6aff81
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 13 additions and 9 deletions

View File

@ -1336,6 +1336,10 @@ void replconfCommand(client *c) {
checkChildrenDone();
if (c->repl_start_cmd_stream_on_ack && c->replstate == SLAVE_STATE_ONLINE)
replicaStartCommandStream(c);
/* If state is send_bulk_and_stream, it means this is the main
* channel of the slave in rdbchannel replication. Normally, slave
* will be put online after rdb fork is completed. There is chance
* that 'ack' might be received before we detect bgsave is done. */
if (c->replstate == SLAVE_STATE_SEND_BULK_AND_STREAM)
replicaPutOnline(c);
/* Note: this command does not reply anything! */
@ -1754,7 +1758,14 @@ void updateSlavesWaitingBgsave(int bgsaveerr, int type) {
/* We can get here via freeClient()->killRDBChild()->checkChildrenDone(). skip disconnected slaves. */
if (!slave->conn) continue;
if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) {
if (slave->replstate == SLAVE_STATE_SEND_BULK_AND_STREAM) {
/* This is the main channel of the slave that received the RDB.
* Put it online if RDB delivery is successful. */
if (bgsaveerr == C_OK)
replicaPutOnline(slave);
else
freeClientAsync(slave);
} else if (slave->replstate == SLAVE_STATE_WAIT_BGSAVE_END) {
struct redis_stat buf;
if (bgsaveerr != C_OK) {

View File

@ -5920,7 +5920,7 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
"mem_fragmentation_bytes:%zd\r\n", mh->total_frag_bytes,
"mem_not_counted_for_evict:%zu\r\n", freeMemoryGetNotCountedMemory(),
"mem_replication_backlog:%zu\r\n", mh->repl_backlog,
"mem_total_replication_buffers:%zu\r\n", server.repl_buffer_mem,
"mem_total_replication_buffers:%zu\r\n", server.repl_buffer_mem + server.repl_full_sync_buffer.mem_used,
"mem_replica_full_sync_buffer:%zu\r\n", server.repl_full_sync_buffer.mem_used,
"mem_clients_slaves:%zu\r\n", mh->clients_slaves,
"mem_clients_normal:%zu\r\n", mh->clients_normal,

View File

@ -381,7 +381,6 @@ start_server {tags {"repl external:skip"}} {
$master config set repl-rdb-channel yes
$master config set repl-backlog-size 1mb
$master config set client-output-buffer-limit "replica 100k 0 0"
$master config set loglevel debug
$master config set repl-diskless-sync-delay 3
start_server {} {
@ -389,7 +388,6 @@ start_server {tags {"repl external:skip"}} {
set replica_pid [srv 0 pid]
$replica config set repl-rdb-channel yes
$replica config set loglevel debug
$replica config set repl-timeout 10
$replica config set key-load-delay 10000
$replica config set loading-process-events-interval-bytes 1024
@ -441,19 +439,16 @@ start_server {tags {"repl external:skip"}} {
$master config set rdb-key-save-delay 300
$master config set client-output-buffer-limit "replica 0 0 0"
$master config set repl-diskless-sync-delay 5
$master config set loglevel debug
populate 10000 master 1
start_server {} {
set replica1 [srv 0 client]
$replica1 config set repl-rdb-channel yes
$replica1 config set loglevel debug
start_server {} {
set replica2 [srv 0 client]
$replica2 config set repl-rdb-channel yes
$replica2 config set loglevel debug
set load_handle [start_write_load $master_host $master_port 100 "key"]
@ -539,7 +534,6 @@ start_server {tags {"repl external:skip"}} {
$master config set repl-diskless-sync yes
$master config set repl-rdb-channel yes
$master config set loglevel debug
$master config set rdb-key-save-delay 1000
populate 3000 prefix1 1
@ -550,7 +544,6 @@ start_server {tags {"repl external:skip"}} {
set replica_pid [srv 0 pid]
$replica config set repl-rdb-channel yes
$replica config set loglevel debug
$replica config set repl-timeout 10
set load_handle [start_write_load $master_host $master_port 100 "key"]