mirror of https://mirror.osredm.com/root/redis.git
Fix fork done handler wrongly update fsync metrics and enhance AOF_ FSYNC_ALWAYS (#11973)
This PR fix several unrelated bugs that were discovered by the same set of tests (WAITAOF tests in #11713), could make the `WAITAOF` test hang. The change in `backgroundRewriteDoneHandler` is about MP-AOF. That leftover / old code assumes that we started a new AOF file just now (when we have a new base into which we're gonna incrementally write), but the fact is that with MP-AOF, the fork done handler doesn't really affect the incremental file being maintained by the parent process, there's no reason to re-issue `SELECT`, and no reason to update any of the fsync variables in that flow. This should have been deleted with MP-AOF (introduced in #9788, 7.0). The damage is that the update to `aof_fsync_offset` will cause us to miss an fsync in `flushAppendOnlyFile`, that happens if we stop write commands in `AOF_FSYNC_EVERYSEC` while an AOFRW is in progress. This caused a new `WAITAOF` test to sometime hang forever. Also because of MP-AOF, we needed to change `aof_fsync_offset` to `aof_last_incr_fsync_offset` and match it to `aof_last_incr_size` in `flushAppendOnlyFile`. This is because in the past we compared `aof_fsync_offset` and `aof_current_size`, but with MP-AOF it could be the total AOF file will be smaller after AOFRW, and the (already existing) incr file still has data that needs to be fsynced. The change in `flushAppendOnlyFile`, about the `AOF_FSYNC_ALWAYS`, it is follow #6053 (the details is in #5985), we also check `AOF_FSYNC_ALWAYS` to handle a case where appendfsync is changed from everysec to always while there is data that's written but not yet fsynced.
This commit is contained in:
parent
557ca05d05
commit
cb17178658
31
src/aof.c
31
src/aof.c
|
@ -758,6 +758,7 @@ void aofOpenIfNeededOnServerStart(void) {
|
|||
}
|
||||
|
||||
server.aof_last_incr_size = getAppendOnlyFileSize(aof_name, NULL);
|
||||
server.aof_last_incr_fsync_offset = server.aof_last_incr_size;
|
||||
|
||||
if (incr_aof_len) {
|
||||
serverLog(LL_NOTICE, "Opening AOF incr file %s on server start", aof_name);
|
||||
|
@ -832,13 +833,14 @@ int openNewIncrAofForAppend(void) {
|
|||
* is already synced at this point so fsync doesn't matter. */
|
||||
if (server.aof_fd != -1) {
|
||||
aof_background_fsync_and_close(server.aof_fd);
|
||||
server.aof_fsync_offset = server.aof_current_size;
|
||||
server.aof_last_fsync = server.unixtime;
|
||||
}
|
||||
server.aof_fd = newfd;
|
||||
|
||||
/* Reset the aof_last_incr_size. */
|
||||
server.aof_last_incr_size = 0;
|
||||
/* Reset the aof_last_incr_fsync_offset. */
|
||||
server.aof_last_incr_fsync_offset = 0;
|
||||
/* Update `server.aof_manifest`. */
|
||||
if (temp_am) aofManifestFreeAndUpdate(temp_am);
|
||||
return C_OK;
|
||||
|
@ -952,7 +954,6 @@ void stopAppendOnly(void) {
|
|||
if (redis_fsync(server.aof_fd) == -1) {
|
||||
serverLog(LL_WARNING,"Fail to fsync the AOF file: %s",strerror(errno));
|
||||
} else {
|
||||
server.aof_fsync_offset = server.aof_current_size;
|
||||
server.aof_last_fsync = server.unixtime;
|
||||
}
|
||||
close(server.aof_fd);
|
||||
|
@ -962,6 +963,7 @@ void stopAppendOnly(void) {
|
|||
server.aof_state = AOF_OFF;
|
||||
server.aof_rewrite_scheduled = 0;
|
||||
server.aof_last_incr_size = 0;
|
||||
server.aof_last_incr_fsync_offset = 0;
|
||||
server.fsynced_reploff = -1;
|
||||
atomicSet(server.fsynced_reploff_pending, 0);
|
||||
killAppendOnlyChild();
|
||||
|
@ -1083,10 +1085,19 @@ void flushAppendOnlyFile(int force) {
|
|||
* stop write commands before fsync called in one second,
|
||||
* the data in page cache cannot be flushed in time. */
|
||||
if (server.aof_fsync == AOF_FSYNC_EVERYSEC &&
|
||||
server.aof_fsync_offset != server.aof_current_size &&
|
||||
server.aof_last_incr_fsync_offset != server.aof_last_incr_size &&
|
||||
server.unixtime > server.aof_last_fsync &&
|
||||
!(sync_in_progress = aofFsyncInProgress())) {
|
||||
goto try_fsync;
|
||||
|
||||
/* Check if we need to do fsync even the aof buffer is empty,
|
||||
* the reason is described in the previous AOF_FSYNC_EVERYSEC block,
|
||||
* and AOF_FSYNC_ALWAYS is also checked here to handle a case where
|
||||
* aof_fsync is changed from everysec to always. */
|
||||
} else if (server.aof_fsync == AOF_FSYNC_ALWAYS &&
|
||||
server.aof_last_incr_fsync_offset != server.aof_last_incr_size)
|
||||
{
|
||||
goto try_fsync;
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
|
@ -1253,14 +1264,14 @@ try_fsync:
|
|||
}
|
||||
latencyEndMonitor(latency);
|
||||
latencyAddSampleIfNeeded("aof-fsync-always",latency);
|
||||
server.aof_fsync_offset = server.aof_current_size;
|
||||
server.aof_last_incr_fsync_offset = server.aof_last_incr_size;
|
||||
server.aof_last_fsync = server.unixtime;
|
||||
atomicSet(server.fsynced_reploff_pending, server.master_repl_offset);
|
||||
} else if ((server.aof_fsync == AOF_FSYNC_EVERYSEC &&
|
||||
server.unixtime > server.aof_last_fsync)) {
|
||||
} else if (server.aof_fsync == AOF_FSYNC_EVERYSEC &&
|
||||
server.unixtime > server.aof_last_fsync) {
|
||||
if (!sync_in_progress) {
|
||||
aof_background_fsync(server.aof_fd);
|
||||
server.aof_fsync_offset = server.aof_current_size;
|
||||
server.aof_last_incr_fsync_offset = server.aof_last_incr_size;
|
||||
}
|
||||
server.aof_last_fsync = server.unixtime;
|
||||
}
|
||||
|
@ -1766,7 +1777,6 @@ int loadAppendOnlyFiles(aofManifest *am) {
|
|||
* executed early, but that shouldn't be a problem since everything will be
|
||||
* fine after the first AOFRW. */
|
||||
server.aof_rewrite_base_size = base_size;
|
||||
server.aof_fsync_offset = server.aof_current_size;
|
||||
|
||||
cleanup:
|
||||
stopLoading(ret == AOF_OK || ret == AOF_TRUNCATED);
|
||||
|
@ -2666,13 +2676,10 @@ void backgroundRewriteDoneHandler(int exitcode, int bysignal) {
|
|||
/* We can safely let `server.aof_manifest` point to 'temp_am' and free the previous one. */
|
||||
aofManifestFreeAndUpdate(temp_am);
|
||||
|
||||
if (server.aof_fd != -1) {
|
||||
if (server.aof_state != AOF_OFF) {
|
||||
/* AOF enabled. */
|
||||
server.aof_selected_db = -1; /* Make sure SELECT is re-issued */
|
||||
server.aof_current_size = getAppendOnlyFileSize(new_base_filename, NULL) + server.aof_last_incr_size;
|
||||
server.aof_rewrite_base_size = server.aof_current_size;
|
||||
server.aof_fsync_offset = server.aof_current_size;
|
||||
server.aof_last_fsync = server.unixtime;
|
||||
}
|
||||
|
||||
/* We don't care about the return value of `aofDelHistoryFiles`, because the history
|
||||
|
|
|
@ -2018,6 +2018,7 @@ void initServerConfig(void) {
|
|||
server.aof_selected_db = -1; /* Make sure the first time will not match */
|
||||
server.aof_flush_postponed_start = 0;
|
||||
server.aof_last_incr_size = 0;
|
||||
server.aof_last_incr_fsync_offset = 0;
|
||||
server.active_defrag_running = 0;
|
||||
server.notify_keyspace_events = 0;
|
||||
server.blocked_clients = 0;
|
||||
|
|
|
@ -1742,7 +1742,8 @@ struct redisServer {
|
|||
off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */
|
||||
off_t aof_current_size; /* AOF current size (Including BASE + INCRs). */
|
||||
off_t aof_last_incr_size; /* The size of the latest incr AOF. */
|
||||
off_t aof_fsync_offset; /* AOF offset which is already synced to disk. */
|
||||
off_t aof_last_incr_fsync_offset; /* AOF offset which is already requested to be synced to disk.
|
||||
* Compare with the aof_last_incr_size. */
|
||||
int aof_flush_sleep; /* Micros to sleep before flush. (used by tests) */
|
||||
int aof_rewrite_scheduled; /* Rewrite once BGSAVE terminates. */
|
||||
sds aof_buf; /* AOF buffer, written before entering the event loop */
|
||||
|
|
|
@ -175,10 +175,52 @@ tags {"wait aof network external:skip"} {
|
|||
$replica config set appendfsync everysec
|
||||
|
||||
test {WAITAOF replica copy everysec} {
|
||||
$replica config set appendfsync everysec
|
||||
waitForBgrewriteaof $replica ;# Make sure there is no AOFRW
|
||||
|
||||
$master incr foo
|
||||
assert_equal [$master waitaof 0 1 0] {1 1}
|
||||
}
|
||||
|
||||
test {WAITAOF replica copy everysec with AOFRW} {
|
||||
$replica config set appendfsync everysec
|
||||
|
||||
# When we trigger an AOFRW, a fsync is triggered when closing the old INCR file,
|
||||
# so with the everysec, we will skip that second of fsync, and in the next second
|
||||
# after that, we will eventually do the fsync.
|
||||
$replica bgrewriteaof
|
||||
waitForBgrewriteaof $replica
|
||||
|
||||
$master incr foo
|
||||
assert_equal [$master waitaof 0 1 0] {1 1}
|
||||
}
|
||||
|
||||
test {WAITAOF replica copy everysec with slow AOFRW} {
|
||||
$replica config set appendfsync everysec
|
||||
$replica config set rdb-key-save-delay 1000000 ;# 1 sec
|
||||
|
||||
$replica bgrewriteaof
|
||||
|
||||
$master incr foo
|
||||
assert_equal [$master waitaof 0 1 0] {1 1}
|
||||
|
||||
$replica config set rdb-key-save-delay 0
|
||||
waitForBgrewriteaof $replica
|
||||
}
|
||||
|
||||
test {WAITAOF replica copy everysec->always with AOFRW} {
|
||||
$replica config set appendfsync everysec
|
||||
|
||||
# Try to fit all of them in the same round second, although there's no way to guarantee
|
||||
# that, it can be done on fast machine. In any case, the test shouldn't fail either.
|
||||
$replica bgrewriteaof
|
||||
$master incr foo
|
||||
waitForBgrewriteaof $replica
|
||||
$replica config set appendfsync always
|
||||
|
||||
assert_equal [$master waitaof 0 1 0] {1 1}
|
||||
}
|
||||
|
||||
test {WAITAOF replica copy appendfsync always} {
|
||||
$replica config set appendfsync always
|
||||
$master incr foo
|
||||
|
|
Loading…
Reference in New Issue