mirror of https://mirror.osredm.com/root/redis.git
Fixes around AOF failed rewrite rate limiting (#10582)
Changes: 1. Check the failed rewrite time threshold only when we actually consider triggering a rewrite. i.e. this should be the last condition tested, since the test has side effects (increasing time threshold) Could have happened in some rare scenarios 2. no limit in startup state (e.g. after restarting redis that previously failed and had many incr files) 3. the “triggered the limit” log would be recorded only when the limit status is returned 4. remove failure count in log (could be misleading in some cases) Co-authored-by: chenyang8094 <chenyang8094@users.noreply.github.com> Co-authored-by: Oran Agra <oran@redislabs.com>
This commit is contained in:
parent
1a93804645
commit
d4cbd8140b
53
src/aof.c
53
src/aof.c
|
@ -815,38 +815,39 @@ int openNewIncrAofForAppend(void) {
|
||||||
#define AOF_REWRITE_LIMITE_THRESHOLD 3
|
#define AOF_REWRITE_LIMITE_THRESHOLD 3
|
||||||
#define AOF_REWRITE_LIMITE_MAX_MINUTES 60 /* 1 hour */
|
#define AOF_REWRITE_LIMITE_MAX_MINUTES 60 /* 1 hour */
|
||||||
int aofRewriteLimited(void) {
|
int aofRewriteLimited(void) {
|
||||||
int limit = 0;
|
static int next_delay_minutes = 0;
|
||||||
static int limit_delay_minutes = 0;
|
|
||||||
static time_t next_rewrite_time = 0;
|
static time_t next_rewrite_time = 0;
|
||||||
|
|
||||||
|
/* If the number of incr AOFs exceeds the threshold but server.aof_lastbgrewrite_status is OK, it
|
||||||
|
* means that redis may have just loaded a dataset containing many incr AOFs. At this time, we
|
||||||
|
* will not limit the AOFRW. */
|
||||||
unsigned long incr_aof_num = listLength(server.aof_manifest->incr_aof_list);
|
unsigned long incr_aof_num = listLength(server.aof_manifest->incr_aof_list);
|
||||||
if (incr_aof_num >= AOF_REWRITE_LIMITE_THRESHOLD) {
|
if (incr_aof_num < AOF_REWRITE_LIMITE_THRESHOLD || server.aof_lastbgrewrite_status == C_OK) {
|
||||||
if (server.unixtime < next_rewrite_time) {
|
/* We may be recovering from limited state, so reset all states. */
|
||||||
limit = 1;
|
next_delay_minutes = 0;
|
||||||
} else {
|
|
||||||
if (limit_delay_minutes == 0) {
|
|
||||||
limit = 1;
|
|
||||||
limit_delay_minutes = 1;
|
|
||||||
} else {
|
|
||||||
limit_delay_minutes *= 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (limit_delay_minutes > AOF_REWRITE_LIMITE_MAX_MINUTES) {
|
|
||||||
limit_delay_minutes = AOF_REWRITE_LIMITE_MAX_MINUTES;
|
|
||||||
}
|
|
||||||
|
|
||||||
next_rewrite_time = server.unixtime + limit_delay_minutes * 60;
|
|
||||||
|
|
||||||
serverLog(LL_WARNING,
|
|
||||||
"Background AOF rewrite has repeatedly failed %ld times and triggered the limit, will retry in %d minutes",
|
|
||||||
incr_aof_num, limit_delay_minutes);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
limit_delay_minutes = 0;
|
|
||||||
next_rewrite_time = 0;
|
next_rewrite_time = 0;
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return limit;
|
/* if it is in the limiting state, then check if the next_rewrite_time is reached */
|
||||||
|
if (next_rewrite_time != 0) {
|
||||||
|
if (server.unixtime < next_rewrite_time) {
|
||||||
|
return 1;
|
||||||
|
} else {
|
||||||
|
next_rewrite_time = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
next_delay_minutes = (next_delay_minutes == 0) ? 1 : (next_delay_minutes * 2);
|
||||||
|
if (next_delay_minutes > AOF_REWRITE_LIMITE_MAX_MINUTES) {
|
||||||
|
next_delay_minutes = AOF_REWRITE_LIMITE_MAX_MINUTES;
|
||||||
|
}
|
||||||
|
|
||||||
|
next_rewrite_time = server.unixtime + next_delay_minutes * 60;
|
||||||
|
serverLog(LL_WARNING,
|
||||||
|
"Background AOF rewrite has repeatedly failed and triggered the limit, will retry in %d minutes", next_delay_minutes);
|
||||||
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------------
|
/* ----------------------------------------------------------------------------
|
||||||
|
|
|
@ -1296,13 +1296,12 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
|
||||||
if (server.aof_state == AOF_ON &&
|
if (server.aof_state == AOF_ON &&
|
||||||
!hasActiveChildProcess() &&
|
!hasActiveChildProcess() &&
|
||||||
server.aof_rewrite_perc &&
|
server.aof_rewrite_perc &&
|
||||||
server.aof_current_size > server.aof_rewrite_min_size &&
|
server.aof_current_size > server.aof_rewrite_min_size)
|
||||||
!aofRewriteLimited())
|
|
||||||
{
|
{
|
||||||
long long base = server.aof_rewrite_base_size ?
|
long long base = server.aof_rewrite_base_size ?
|
||||||
server.aof_rewrite_base_size : 1;
|
server.aof_rewrite_base_size : 1;
|
||||||
long long growth = (server.aof_current_size*100/base) - 100;
|
long long growth = (server.aof_current_size*100/base) - 100;
|
||||||
if (growth >= server.aof_rewrite_perc) {
|
if (growth >= server.aof_rewrite_perc && !aofRewriteLimited()) {
|
||||||
serverLog(LL_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
|
serverLog(LL_NOTICE,"Starting automatic rewriting of AOF on %lld%% growth",growth);
|
||||||
rewriteAppendOnlyFileBackground();
|
rewriteAppendOnlyFileBackground();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue