Fix wrongly updating fsynced_reploff_pending when appendfsync=everysecond (#13793)

```
if (server.aof_fsync == AOF_FSYNC_EVERYSEC &&
    server.aof_last_incr_fsync_offset != server.aof_last_incr_size &&
    server.mstime - server.aof_last_fsync >= 1000 &&
    !(sync_in_progress = aofFsyncInProgress())) {
    goto try_fsync;
```
In https://github.com/redis/redis/pull/12622, when when
appendfsync=everysecond, if redis has written some data to AOF but not
`fsync`, and less than 1 second has passed since the last `fsync `,
redis will won't fsync AOF, but we will update `
fsynced_reploff_pending`, so it cause the `WAITAOF` to return
prematurely.

this bug is introduced in https://github.com/redis/redis/pull/12622,
from 7.4

The bug fix
1bd6688bca
is just as follows:
```diff
diff --git a/src/aof.c b/src/aof.c
index 8ccd8d8f8..521b30449 100644
--- a/src/aof.c
+++ b/src/aof.c
@@ -1096,8 +1096,11 @@ void flushAppendOnlyFile(int force) {
              * in which case master_repl_offset will increase but fsynced_reploff_pending won't be updated
              * (because there's no reason, from the AOF POV, to call fsync) and then WAITAOF may wait on
              * the higher offset (which contains data that was only propagated to replicas, and not to AOF) */
-            if (!sync_in_progress && server.aof_fsync != AOF_FSYNC_NO)
+            if (server.aof_last_incr_fsync_offset == server.aof_last_incr_size &&
+                !(sync_in_progress = aofFsyncInProgress()))
+            {
                 atomicSet(server.fsynced_reploff_pending, server.master_repl_offset);
+            }
             return;
```
Additionally, we slightly refactored fsync AOF to make it simpler, as
584f008d1c
This commit is contained in:
Yuan Wang 2025-02-13 10:48:29 +08:00 committed by GitHub
parent 1583d60cd6
commit 87124a38b6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 25 additions and 26 deletions

View File

@ -1071,35 +1071,34 @@ void flushAppendOnlyFile(int force) {
mstime_t latency;
if (sdslen(server.aof_buf) == 0) {
/* Check if we need to do fsync even the aof buffer is empty,
* because previously in AOF_FSYNC_EVERYSEC mode, fsync is
* called only when aof buffer is not empty, so if users
* stop write commands before fsync called in one second,
* the data in page cache cannot be flushed in time. */
if (server.aof_fsync == AOF_FSYNC_EVERYSEC &&
server.aof_last_incr_fsync_offset != server.aof_last_incr_size &&
server.mstime - server.aof_last_fsync >= 1000 &&
!(sync_in_progress = aofFsyncInProgress())) {
goto try_fsync;
/* Check if we need to do fsync even the aof buffer is empty,
* the reason is described in the previous AOF_FSYNC_EVERYSEC block,
* and AOF_FSYNC_ALWAYS is also checked here to handle a case where
* aof_fsync is changed from everysec to always. */
} else if (server.aof_fsync == AOF_FSYNC_ALWAYS &&
server.aof_last_incr_fsync_offset != server.aof_last_incr_size)
{
goto try_fsync;
} else {
if (server.aof_last_incr_fsync_offset == server.aof_last_incr_size) {
/* All data is fsync'd already: Update fsynced_reploff_pending just in case.
* This is needed to avoid a WAITAOF hang in case a module used RM_Call with the NO_AOF flag,
* in which case master_repl_offset will increase but fsynced_reploff_pending won't be updated
* (because there's no reason, from the AOF POV, to call fsync) and then WAITAOF may wait on
* the higher offset (which contains data that was only propagated to replicas, and not to AOF) */
if (!sync_in_progress && server.aof_fsync != AOF_FSYNC_NO)
* This is needed to avoid a WAITAOF hang in case a module used RM_Call
* with the NO_AOF flag, in which case master_repl_offset will increase but
* fsynced_reploff_pending won't be updated (because there's no reason, from
* the AOF POV, to call fsync) and then WAITAOF may wait on the higher offset
* (which contains data that was only propagated to replicas, and not to AOF) */
if (!aofFsyncInProgress())
atomicSet(server.fsynced_reploff_pending, server.master_repl_offset);
return;
} else {
/* Check if we need to do fsync even the aof buffer is empty,
* because previously in AOF_FSYNC_EVERYSEC mode, fsync is
* called only when aof buffer is not empty, so if users
* stop write commands before fsync called in one second,
* the data in page cache cannot be flushed in time. */
if (server.aof_fsync == AOF_FSYNC_EVERYSEC &&
server.mstime - server.aof_last_fsync >= 1000 &&
!(sync_in_progress = aofFsyncInProgress()))
goto try_fsync;
/* Check if we need to do fsync even the aof buffer is empty,
* the reason is described in the previous AOF_FSYNC_EVERYSEC block,
* and AOF_FSYNC_ALWAYS is also checked here to handle a case where
* aof_fsync is changed from everysec to always. */
if (server.aof_fsync == AOF_FSYNC_ALWAYS)
goto try_fsync;
}
return;
}
if (server.aof_fsync == AOF_FSYNC_EVERYSEC)