Merge remote-tracking branch 'upstream/unstable' into HEAD

This commit is contained in:
YaacovHazan 2025-01-14 14:01:19 +02:00
commit 9c81f8bd61
115 changed files with 8217 additions and 1635 deletions

View File

@ -76,7 +76,6 @@ jobs:
if: | if: |
(github.event_name == 'workflow_dispatch' || (github.event_name != 'workflow_dispatch' && github.repository == 'redis/redis')) && (github.event_name == 'workflow_dispatch' || (github.event_name != 'workflow_dispatch' && github.repository == 'redis/redis')) &&
!contains(github.event.inputs.skipjobs, 'fortify') !contains(github.event.inputs.skipjobs, 'fortify')
container: ubuntu:lunar
timeout-minutes: 14400 timeout-minutes: 14400
steps: steps:
- name: prep - name: prep
@ -94,12 +93,10 @@ jobs:
ref: ${{ env.GITHUB_HEAD_REF }} ref: ${{ env.GITHUB_HEAD_REF }}
- name: make - name: make
run: | run: |
apt-get update && apt-get install -y make gcc-13 g++-13 apt-get update && apt-get install -y make gcc g++
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-13 100
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-13 100
make CC=gcc REDIS_CFLAGS='-Werror -DREDIS_TEST -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3' make CC=gcc REDIS_CFLAGS='-Werror -DREDIS_TEST -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3'
- name: testprep - name: testprep
run: apt-get install -y tcl8.6 tclx procps run: sudo apt-get install -y tcl8.6 tclx procps
- name: test - name: test
if: true && !contains(github.event.inputs.skiptests, 'redis') if: true && !contains(github.event.inputs.skiptests, 'redis')
run: ./runtest --accurate --verbose --dump-logs ${{github.event.inputs.test_args}} run: ./runtest --accurate --verbose --dump-logs ${{github.event.inputs.test_args}}
@ -876,7 +873,7 @@ jobs:
build-macos: build-macos:
strategy: strategy:
matrix: matrix:
os: [macos-12, macos-14] os: [macos-13, macos-15]
runs-on: ${{ matrix.os }} runs-on: ${{ matrix.os }}
if: | if: |
(github.event_name == 'workflow_dispatch' || (github.event_name != 'workflow_dispatch' && github.repository == 'redis/redis')) && (github.event_name == 'workflow_dispatch' || (github.event_name != 'workflow_dispatch' && github.repository == 'redis/redis')) &&
@ -903,7 +900,7 @@ jobs:
run: make REDIS_CFLAGS='-Werror -DREDIS_TEST' run: make REDIS_CFLAGS='-Werror -DREDIS_TEST'
test-freebsd: test-freebsd:
runs-on: macos-12 runs-on: macos-13
if: | if: |
(github.event_name == 'workflow_dispatch' || (github.event_name != 'workflow_dispatch' && github.repository == 'redis/redis')) && (github.event_name == 'workflow_dispatch' || (github.event_name != 'workflow_dispatch' && github.repository == 'redis/redis')) &&
!contains(github.event.inputs.skipjobs, 'freebsd') !contains(github.event.inputs.skipjobs, 'freebsd')

View File

@ -15,7 +15,8 @@ Another good example is to think of Redis as a more complex version of memcached
If you want to know more, this is a list of selected starting points: If you want to know more, this is a list of selected starting points:
* Introduction to Redis data types. https://redis.io/topics/data-types-intro * Introduction to Redis data types. https://redis.io/docs/latest/develop/data-types/
* The full list of Redis commands. https://redis.io/commands * The full list of Redis commands. https://redis.io/commands
* There is much more inside the official Redis documentation. https://redis.io/documentation * There is much more inside the official Redis documentation. https://redis.io/documentation
@ -493,7 +494,7 @@ Other C files
* `dict.c` is an implementation of a non-blocking hash table which rehashes incrementally. * `dict.c` is an implementation of a non-blocking hash table which rehashes incrementally.
* `cluster.c` implements the Redis Cluster. Probably a good read only after being very familiar with the rest of the Redis code base. If you want to read `cluster.c` make sure to read the [Redis Cluster specification][4]. * `cluster.c` implements the Redis Cluster. Probably a good read only after being very familiar with the rest of the Redis code base. If you want to read `cluster.c` make sure to read the [Redis Cluster specification][4].
[4]: https://redis.io/topics/cluster-spec [4]: https://redis.io/docs/latest/operate/oss_and_stack/reference/cluster-spec/
Anatomy of a Redis command Anatomy of a Redis command
--- ---

View File

@ -478,7 +478,7 @@ static int __redisGetSubscribeCallback(redisAsyncContext *ac, redisReply *reply,
/* Match reply with the expected format of a pushed message. /* Match reply with the expected format of a pushed message.
* The type and number of elements (3 to 4) are specified at: * The type and number of elements (3 to 4) are specified at:
* https://redis.io/topics/pubsub#format-of-pushed-messages */ * https://redis.io/docs/latest/develop/interact/pubsub/#format-of-pushed-messages */
if ((reply->type == REDIS_REPLY_ARRAY && !(c->flags & REDIS_SUPPORTS_PUSH) && reply->elements >= 3) || if ((reply->type == REDIS_REPLY_ARRAY && !(c->flags & REDIS_SUPPORTS_PUSH) && reply->elements >= 3) ||
reply->type == REDIS_REPLY_PUSH) { reply->type == REDIS_REPLY_PUSH) {
assert(reply->element[0]->type == REDIS_REPLY_STRING); assert(reply->element[0]->type == REDIS_REPLY_STRING);

View File

@ -727,6 +727,24 @@ repl-disable-tcp-nodelay no
# #
# repl-backlog-ttl 3600 # repl-backlog-ttl 3600
# During a fullsync, the master may decide to send both the RDB file and the
# replication stream to the replica in parallel. This approach shifts the
# responsibility of buffering the replication stream to the replica during the
# fullsync process. The replica accumulates the replication stream data until
# the RDB file is fully loaded. Once the RDB delivery is completed and
# successfully loaded, the replica begins processing and applying the
# accumulated replication data to the db. The configuration below controls how
# much replication data the replica can accumulate during a fullsync.
#
# When the replica reaches this limit, it will stop accumulating further data.
# At this point, additional data accumulation may occur on the master side
# depending on the 'client-output-buffer-limit <replica>' config of master.
#
# A value of 0 means replica inherits hard limit of
# 'client-output-buffer-limit <replica>' config to limit accumulation size.
#
# replica-full-sync-buffer-limit 0
# The replica priority is an integer number published by Redis in the INFO # The replica priority is an integer number published by Redis in the INFO
# output. It is used by Redis Sentinel in order to select a replica to promote # output. It is used by Redis Sentinel in order to select a replica to promote
# into a master if the master is no longer working correctly. # into a master if the master is no longer working correctly.
@ -838,7 +856,7 @@ replica-priority 100
# this is used in order to send invalidation messages to clients. Please # this is used in order to send invalidation messages to clients. Please
# check this page to understand more about the feature: # check this page to understand more about the feature:
# #
# https://redis.io/topics/client-side-caching # https://redis.io/docs/latest/develop/use/client-side-caching/
# #
# When tracking is enabled for a client, all the read only queries are assumed # When tracking is enabled for a client, all the read only queries are assumed
# to be cached: this will force Redis to store information in the invalidation # to be cached: this will force Redis to store information in the invalidation
@ -1016,7 +1034,7 @@ replica-priority 100
# * stream - Data type: streams related. # * stream - Data type: streams related.
# #
# For more information about ACL configuration please refer to # For more information about ACL configuration please refer to
# the Redis web site at https://redis.io/topics/acl # the Redis web site at https://redis.io/docs/latest/operate/oss_and_stack/management/security/acl/
# ACL LOG # ACL LOG
# #
@ -1291,38 +1309,27 @@ lazyfree-lazy-user-flush no
# in different I/O threads. Since especially writing is so slow, normally # in different I/O threads. Since especially writing is so slow, normally
# Redis users use pipelining in order to speed up the Redis performances per # Redis users use pipelining in order to speed up the Redis performances per
# core, and spawn multiple instances in order to scale more. Using I/O # core, and spawn multiple instances in order to scale more. Using I/O
# threads it is possible to easily speedup two times Redis without resorting # threads it is possible to easily speedup several times Redis without resorting
# to pipelining nor sharding of the instance. # to pipelining nor sharding of the instance.
# #
# By default threading is disabled, we suggest enabling it only in machines # By default threading is disabled, we suggest enabling it only in machines
# that have at least 4 or more cores, leaving at least one spare core. # that have at least 4 or more cores, leaving at least one spare core.
# Using more than 8 threads is unlikely to help much. We also recommend using # We also recommend using threaded I/O only if you actually have performance
# threaded I/O only if you actually have performance problems, with Redis # problems, with Redis instances being able to use a quite big percentage of
# instances being able to use a quite big percentage of CPU time, otherwise # CPU time, otherwise there is no point in using this feature.
# there is no point in using this feature.
# #
# So for instance if you have a four cores boxes, try to use 2 or 3 I/O # So for instance if you have a four cores boxes, try to use 3 I/O
# threads, if you have a 8 cores, try to use 6 threads. In order to # threads, if you have a 8 cores, try to use 7 threads. In order to
# enable I/O threads use the following configuration directive: # enable I/O threads use the following configuration directive:
# #
# io-threads 4 # io-threads 4
# #
# Setting io-threads to 1 will just use the main thread as usual. # Setting io-threads to 1 will just use the main thread as usual.
# When I/O threads are enabled, we only use threads for writes, that is # When I/O threads are enabled, we not only use threads for writes, that
# to thread the write(2) syscall and transfer the client buffers to the # is to thread the write(2) syscall and transfer the client buffers to the
# socket. However it is also possible to enable threading of reads and # socket, but also use threads for reads and protocol parsing.
# protocol parsing using the following configuration directive, by setting
# it to yes:
# #
# io-threads-do-reads no # NOTE: If you want to test the Redis speedup using redis-benchmark, make
#
# Usually threading reads doesn't help much.
#
# NOTE 1: This configuration directive cannot be changed at runtime via
# CONFIG SET. Also, this feature currently does not work when SSL is
# enabled.
#
# NOTE 2: If you want to test the Redis speedup using redis-benchmark, make
# sure you also run the benchmark itself in threaded mode, using the # sure you also run the benchmark itself in threaded mode, using the
# --threads option to match the number of Redis threads, otherwise you'll not # --threads option to match the number of Redis threads, otherwise you'll not
# be able to notice the improvements. # be able to notice the improvements.
@ -1362,7 +1369,7 @@ oom-score-adj-values 0 200 800
#################### KERNEL transparent hugepage CONTROL ###################### #################### KERNEL transparent hugepage CONTROL ######################
# Usually the kernel Transparent Huge Pages control is set to "madvise" or # Usually the kernel Transparent Huge Pages control is set to "madvise" or
# or "never" by default (/sys/kernel/mm/transparent_hugepage/enabled), in which # "never" by default (/sys/kernel/mm/transparent_hugepage/enabled), in which
# case this config has no effect. On systems in which it is set to "always", # case this config has no effect. On systems in which it is set to "always",
# redis will attempt to disable it specifically for the redis process in order # redis will attempt to disable it specifically for the redis process in order
# to avoid latency problems specifically with fork(2) and CoW. # to avoid latency problems specifically with fork(2) and CoW.
@ -1393,7 +1400,7 @@ disable-thp yes
# restarting the server can lead to data loss. A conversion needs to be done # restarting the server can lead to data loss. A conversion needs to be done
# by setting it via CONFIG command on a live server first. # by setting it via CONFIG command on a live server first.
# #
# Please check https://redis.io/topics/persistence for more information. # Please check https://redis.io/docs/latest/operate/oss_and_stack/management/persistence/ for more information.
appendonly no appendonly no
@ -1880,7 +1887,7 @@ latency-monitor-threshold 0
############################# EVENT NOTIFICATION ############################## ############################# EVENT NOTIFICATION ##############################
# Redis can notify Pub/Sub clients about events happening in the key space. # Redis can notify Pub/Sub clients about events happening in the key space.
# This feature is documented at https://redis.io/topics/notifications # This feature is documented at https://redis.io/docs/latest/develop/use/keyspace-notifications/
# #
# For instance if keyspace events notification is enabled, and a client # For instance if keyspace events notification is enabled, and a client
# performs a DEL operation on key "foo" stored in the Database 0, two # performs a DEL operation on key "foo" stored in the Database 0, two

View File

@ -133,7 +133,7 @@ sentinel monitor mymaster 127.0.0.1 6379 2
sentinel down-after-milliseconds mymaster 30000 sentinel down-after-milliseconds mymaster 30000
# IMPORTANT NOTE: starting with Redis 6.2 ACL capability is supported for # IMPORTANT NOTE: starting with Redis 6.2 ACL capability is supported for
# Sentinel mode, please refer to the Redis website https://redis.io/topics/acl # Sentinel mode, please refer to the Redis website https://redis.io/docs/latest/operate/oss_and_stack/management/security/acl/
# for more details. # for more details.
# Sentinel's ACL users are defined in the following format: # Sentinel's ACL users are defined in the following format:
@ -145,7 +145,7 @@ sentinel down-after-milliseconds mymaster 30000
# user worker +@admin +@connection ~* on >ffa9203c493aa99 # user worker +@admin +@connection ~* on >ffa9203c493aa99
# #
# For more information about ACL configuration please refer to the Redis # For more information about ACL configuration please refer to the Redis
# website at https://redis.io/topics/acl and redis server configuration # website at https://redis.io/docs/latest/operate/oss_and_stack/management/security/acl/ and redis server configuration
# template redis.conf. # template redis.conf.
# ACL LOG # ACL LOG
@ -174,7 +174,7 @@ acllog-max-len 128
# so Sentinel will try to authenticate with the same password to all the # so Sentinel will try to authenticate with the same password to all the
# other Sentinels. So you need to configure all your Sentinels in a given # other Sentinels. So you need to configure all your Sentinels in a given
# group with the same "requirepass" password. Check the following documentation # group with the same "requirepass" password. Check the following documentation
# for more info: https://redis.io/topics/sentinel # for more info: https://redis.io/docs/latest/operate/oss_and_stack/management/sentinel/
# #
# IMPORTANT NOTE: starting with Redis 6.2 "requirepass" is a compatibility # IMPORTANT NOTE: starting with Redis 6.2 "requirepass" is a compatibility
# layer on top of the ACL system. The option effect will be just setting # layer on top of the ACL system. The option effect will be just setting

View File

@ -354,11 +354,11 @@ endif
REDIS_SERVER_NAME=redis-server$(PROG_SUFFIX) REDIS_SERVER_NAME=redis-server$(PROG_SUFFIX)
REDIS_SENTINEL_NAME=redis-sentinel$(PROG_SUFFIX) REDIS_SENTINEL_NAME=redis-sentinel$(PROG_SUFFIX)
REDIS_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o ebuckets.o mstr.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o REDIS_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o ebuckets.o eventnotifier.o iothread.o mstr.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o
REDIS_CLI_NAME=redis-cli$(PROG_SUFFIX) REDIS_CLI_NAME=redis-cli$(PROG_SUFFIX)
REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o redisassert.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o redisassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o
REDIS_BENCHMARK_NAME=redis-benchmark$(PROG_SUFFIX) REDIS_BENCHMARK_NAME=redis-benchmark$(PROG_SUFFIX)
REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o dict.o zmalloc.o redisassert.o release.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o dict.o zmalloc.o redisassert.o release.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o
REDIS_CHECK_RDB_NAME=redis-check-rdb$(PROG_SUFFIX) REDIS_CHECK_RDB_NAME=redis-check-rdb$(PROG_SUFFIX)
REDIS_CHECK_AOF_NAME=redis-check-aof$(PROG_SUFFIX) REDIS_CHECK_AOF_NAME=redis-check-aof$(PROG_SUFFIX)
ALL_SOURCES=$(sort $(patsubst %.o,%.c,$(REDIS_SERVER_OBJ) $(REDIS_CLI_OBJ) $(REDIS_BENCHMARK_OBJ))) ALL_SOURCES=$(sort $(patsubst %.o,%.c,$(REDIS_SERVER_OBJ) $(REDIS_CLI_OBJ) $(REDIS_BENCHMARK_OBJ)))

View File

@ -277,7 +277,7 @@ int ACLListMatchSds(void *a, void *b) {
/* Method to free list elements from ACL users password/patterns lists. */ /* Method to free list elements from ACL users password/patterns lists. */
void ACLListFreeSds(void *item) { void ACLListFreeSds(void *item) {
sdsfree(item); sdsfreegeneric(item);
} }
/* Method to duplicate list elements from ACL users password/patterns lists. */ /* Method to duplicate list elements from ACL users password/patterns lists. */
@ -469,6 +469,11 @@ void ACLFreeUser(user *u) {
zfree(u); zfree(u);
} }
/* Generic version of ACLFreeUser. */
void ACLFreeUserGeneric(void *u) {
ACLFreeUser((user *)u);
}
/* When a user is deleted we need to cycle the active /* When a user is deleted we need to cycle the active
* connections in order to kill all the pending ones that * connections in order to kill all the pending ones that
* are authenticated with such user. */ * are authenticated with such user. */
@ -1061,19 +1066,24 @@ int ACLSetSelector(aclSelector *selector, const char* op, size_t oplen) {
int flags = 0; int flags = 0;
size_t offset = 1; size_t offset = 1;
if (op[0] == '%') { if (op[0] == '%') {
int perm_ok = 1;
for (; offset < oplen; offset++) { for (; offset < oplen; offset++) {
if (toupper(op[offset]) == 'R' && !(flags & ACL_READ_PERMISSION)) { if (toupper(op[offset]) == 'R' && !(flags & ACL_READ_PERMISSION)) {
flags |= ACL_READ_PERMISSION; flags |= ACL_READ_PERMISSION;
} else if (toupper(op[offset]) == 'W' && !(flags & ACL_WRITE_PERMISSION)) { } else if (toupper(op[offset]) == 'W' && !(flags & ACL_WRITE_PERMISSION)) {
flags |= ACL_WRITE_PERMISSION; flags |= ACL_WRITE_PERMISSION;
} else if (op[offset] == '~' && flags) { } else if (op[offset] == '~') {
offset++; offset++;
break; break;
} else { } else {
errno = EINVAL; perm_ok = 0;
return C_ERR; break;
} }
} }
if (!flags || !perm_ok) {
errno = EINVAL;
return C_ERR;
}
} else { } else {
flags = ACL_ALL_PERMISSION; flags = ACL_ALL_PERMISSION;
} }
@ -1577,14 +1587,22 @@ static int ACLSelectorCheckKey(aclSelector *selector, const char *key, int keyle
if (keyspec_flags & CMD_KEY_DELETE) key_flags |= ACL_WRITE_PERMISSION; if (keyspec_flags & CMD_KEY_DELETE) key_flags |= ACL_WRITE_PERMISSION;
if (keyspec_flags & CMD_KEY_UPDATE) key_flags |= ACL_WRITE_PERMISSION; if (keyspec_flags & CMD_KEY_UPDATE) key_flags |= ACL_WRITE_PERMISSION;
/* Is given key represent a prefix of a set of keys */
int prefix = keyspec_flags & CMD_KEY_PREFIX;
/* Test this key against every pattern. */ /* Test this key against every pattern. */
while((ln = listNext(&li))) { while((ln = listNext(&li))) {
keyPattern *pattern = listNodeValue(ln); keyPattern *pattern = listNodeValue(ln);
if ((pattern->flags & key_flags) != key_flags) if ((pattern->flags & key_flags) != key_flags)
continue; continue;
size_t plen = sdslen(pattern->pattern); size_t plen = sdslen(pattern->pattern);
if (stringmatchlen(pattern->pattern,plen,key,keylen,0)) if (prefix) {
return ACL_OK; if (prefixmatch(pattern->pattern,plen,key,keylen,0))
return ACL_OK;
} else {
if (stringmatchlen(pattern->pattern, plen, key, keylen, 0))
return ACL_OK;
}
} }
return ACL_DENIED_KEY; return ACL_DENIED_KEY;
} }
@ -2446,12 +2464,12 @@ sds ACLLoadFromFile(const char *filename) {
} }
if (user_channels) if (user_channels)
raxFreeWithCallback(user_channels, (void(*)(void*))listRelease); raxFreeWithCallback(user_channels, listReleaseGeneric);
raxFreeWithCallback(old_users,(void(*)(void*))ACLFreeUser); raxFreeWithCallback(old_users, ACLFreeUserGeneric);
sdsfree(errors); sdsfree(errors);
return NULL; return NULL;
} else { } else {
raxFreeWithCallback(Users,(void(*)(void*))ACLFreeUser); raxFreeWithCallback(Users, ACLFreeUserGeneric);
Users = old_users; Users = old_users;
errors = sdscat(errors,"WARNING: ACL errors detected, no change to the previously active ACL rules was performed"); errors = sdscat(errors,"WARNING: ACL errors detected, no change to the previously active ACL rules was performed");
return errors; return errors;

View File

@ -61,6 +61,11 @@ void listRelease(list *list)
zfree(list); zfree(list);
} }
/* Generic version of listRelease. */
void listReleaseGeneric(void *list) {
listRelease((struct list*)list);
}
/* Add a new node to the list, to head, containing the specified 'value' /* Add a new node to the list, to head, containing the specified 'value'
* pointer as value. * pointer as value.
* *

View File

@ -51,6 +51,7 @@ typedef struct list {
/* Prototypes */ /* Prototypes */
list *listCreate(void); list *listCreate(void);
void listRelease(list *list); void listRelease(list *list);
void listReleaseGeneric(void *list);
void listEmpty(list *list); void listEmpty(list *list);
list *listAddNodeHead(list *list, void *value); list *listAddNodeHead(list *list, void *value);
list *listAddNodeTail(list *list, void *value); list *listAddNodeTail(list *list, void *value);

View File

@ -42,7 +42,7 @@
#endif #endif
#endif #endif
#define INITIAL_EVENT 1024
aeEventLoop *aeCreateEventLoop(int setsize) { aeEventLoop *aeCreateEventLoop(int setsize) {
aeEventLoop *eventLoop; aeEventLoop *eventLoop;
int i; int i;
@ -50,8 +50,9 @@ aeEventLoop *aeCreateEventLoop(int setsize) {
monotonicInit(); /* just in case the calling app didn't initialize */ monotonicInit(); /* just in case the calling app didn't initialize */
if ((eventLoop = zmalloc(sizeof(*eventLoop))) == NULL) goto err; if ((eventLoop = zmalloc(sizeof(*eventLoop))) == NULL) goto err;
eventLoop->events = zmalloc(sizeof(aeFileEvent)*setsize); eventLoop->nevents = setsize < INITIAL_EVENT ? setsize : INITIAL_EVENT;
eventLoop->fired = zmalloc(sizeof(aeFiredEvent)*setsize); eventLoop->events = zmalloc(sizeof(aeFileEvent)*eventLoop->nevents);
eventLoop->fired = zmalloc(sizeof(aeFiredEvent)*eventLoop->nevents);
if (eventLoop->events == NULL || eventLoop->fired == NULL) goto err; if (eventLoop->events == NULL || eventLoop->fired == NULL) goto err;
eventLoop->setsize = setsize; eventLoop->setsize = setsize;
eventLoop->timeEventHead = NULL; eventLoop->timeEventHead = NULL;
@ -61,10 +62,11 @@ aeEventLoop *aeCreateEventLoop(int setsize) {
eventLoop->beforesleep = NULL; eventLoop->beforesleep = NULL;
eventLoop->aftersleep = NULL; eventLoop->aftersleep = NULL;
eventLoop->flags = 0; eventLoop->flags = 0;
memset(eventLoop->privdata, 0, sizeof(eventLoop->privdata));
if (aeApiCreate(eventLoop) == -1) goto err; if (aeApiCreate(eventLoop) == -1) goto err;
/* Events with mask == AE_NONE are not set. So let's initialize the /* Events with mask == AE_NONE are not set. So let's initialize the
* vector with it. */ * vector with it. */
for (i = 0; i < setsize; i++) for (i = 0; i < eventLoop->nevents; i++)
eventLoop->events[i].mask = AE_NONE; eventLoop->events[i].mask = AE_NONE;
return eventLoop; return eventLoop;
@ -102,20 +104,19 @@ void aeSetDontWait(aeEventLoop *eventLoop, int noWait) {
* *
* Otherwise AE_OK is returned and the operation is successful. */ * Otherwise AE_OK is returned and the operation is successful. */
int aeResizeSetSize(aeEventLoop *eventLoop, int setsize) { int aeResizeSetSize(aeEventLoop *eventLoop, int setsize) {
int i;
if (setsize == eventLoop->setsize) return AE_OK; if (setsize == eventLoop->setsize) return AE_OK;
if (eventLoop->maxfd >= setsize) return AE_ERR; if (eventLoop->maxfd >= setsize) return AE_ERR;
if (aeApiResize(eventLoop,setsize) == -1) return AE_ERR; if (aeApiResize(eventLoop,setsize) == -1) return AE_ERR;
eventLoop->events = zrealloc(eventLoop->events,sizeof(aeFileEvent)*setsize);
eventLoop->fired = zrealloc(eventLoop->fired,sizeof(aeFiredEvent)*setsize);
eventLoop->setsize = setsize; eventLoop->setsize = setsize;
/* Make sure that if we created new slots, they are initialized with /* If the current allocated space is larger than the requested size,
* an AE_NONE mask. */ * we need to shrink it to the requested size. */
for (i = eventLoop->maxfd+1; i < setsize; i++) if (setsize < eventLoop->nevents) {
eventLoop->events[i].mask = AE_NONE; eventLoop->events = zrealloc(eventLoop->events,sizeof(aeFileEvent)*setsize);
eventLoop->fired = zrealloc(eventLoop->fired,sizeof(aeFiredEvent)*setsize);
eventLoop->nevents = setsize;
}
return AE_OK; return AE_OK;
} }
@ -147,6 +148,22 @@ int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask,
errno = ERANGE; errno = ERANGE;
return AE_ERR; return AE_ERR;
} }
/* Resize the events and fired arrays if the file
* descriptor exceeds the current number of events. */
if (unlikely(fd >= eventLoop->nevents)) {
int newnevents = eventLoop->nevents;
newnevents = (newnevents * 2 > fd + 1) ? newnevents * 2 : fd + 1;
newnevents = (newnevents > eventLoop->setsize) ? eventLoop->setsize : newnevents;
eventLoop->events = zrealloc(eventLoop->events, sizeof(aeFileEvent) * newnevents);
eventLoop->fired = zrealloc(eventLoop->fired, sizeof(aeFiredEvent) * newnevents);
/* Initialize new slots with an AE_NONE mask */
for (int i = eventLoop->nevents; i < newnevents; i++)
eventLoop->events[i].mask = AE_NONE;
eventLoop->nevents = newnevents;
}
aeFileEvent *fe = &eventLoop->events[fd]; aeFileEvent *fe = &eventLoop->events[fd];
if (aeApiAddEvent(eventLoop, fd, mask) == -1) if (aeApiAddEvent(eventLoop, fd, mask) == -1)

View File

@ -79,6 +79,7 @@ typedef struct aeEventLoop {
int maxfd; /* highest file descriptor currently registered */ int maxfd; /* highest file descriptor currently registered */
int setsize; /* max number of file descriptors tracked */ int setsize; /* max number of file descriptors tracked */
long long timeEventNextId; long long timeEventNextId;
int nevents; /* Size of Registered events */
aeFileEvent *events; /* Registered events */ aeFileEvent *events; /* Registered events */
aeFiredEvent *fired; /* Fired events */ aeFiredEvent *fired; /* Fired events */
aeTimeEvent *timeEventHead; aeTimeEvent *timeEventHead;
@ -87,6 +88,7 @@ typedef struct aeEventLoop {
aeBeforeSleepProc *beforesleep; aeBeforeSleepProc *beforesleep;
aeBeforeSleepProc *aftersleep; aeBeforeSleepProc *aftersleep;
int flags; int flags;
void *privdata[2];
} aeEventLoop; } aeEventLoop;
/* Prototypes */ /* Prototypes */

View File

@ -32,7 +32,7 @@
* (if the flag was 0 -> set to 1, if it's already 1 -> do nothing, but the final result is that the flag is set), * (if the flag was 0 -> set to 1, if it's already 1 -> do nothing, but the final result is that the flag is set),
* and also it has a full barrier (__sync_lock_test_and_set has acquire barrier). * and also it has a full barrier (__sync_lock_test_and_set has acquire barrier).
* *
* NOTE2: Unlike other atomic type, which aren't guaranteed to be lock free, c11 atmoic_flag does. * NOTE2: Unlike other atomic type, which aren't guaranteed to be lock free, c11 atomic_flag does.
* To check whether a type is lock free, atomic_is_lock_free() can be used. * To check whether a type is lock free, atomic_is_lock_free() can be used.
* It can be considered to limit the flag type to atomic_flag to improve performance. * It can be considered to limit the flag type to atomic_flag to improve performance.
* *

View File

@ -489,22 +489,27 @@ int getBitfieldTypeFromArgument(client *c, robj *o, int *sign, int *bits) {
* bits to a string object. The command creates or pad with zeroes the string * bits to a string object. The command creates or pad with zeroes the string
* so that the 'maxbit' bit can be addressed. The object is finally * so that the 'maxbit' bit can be addressed. The object is finally
* returned. Otherwise if the key holds a wrong type NULL is returned and * returned. Otherwise if the key holds a wrong type NULL is returned and
* an error is sent to the client. */ * an error is sent to the client.
robj *lookupStringForBitCommand(client *c, uint64_t maxbit, int *dirty) { *
* (Must provide all the arguments to the function)
*/
static robj *lookupStringForBitCommand(client *c, uint64_t maxbit,
size_t *strOldSize, size_t *strGrowSize)
{
size_t byte = maxbit >> 3; size_t byte = maxbit >> 3;
robj *o = lookupKeyWrite(c->db,c->argv[1]); robj *o = lookupKeyWrite(c->db,c->argv[1]);
if (checkType(c,o,OBJ_STRING)) return NULL; if (checkType(c,o,OBJ_STRING)) return NULL;
if (dirty) *dirty = 0;
if (o == NULL) { if (o == NULL) {
o = createObject(OBJ_STRING,sdsnewlen(NULL, byte+1)); o = createObject(OBJ_STRING,sdsnewlen(NULL, byte+1));
dbAdd(c->db,c->argv[1],o); dbAdd(c->db,c->argv[1],o);
if (dirty) *dirty = 1; *strGrowSize = byte + 1;
*strOldSize = 0;
} else { } else {
o = dbUnshareStringValue(c->db,c->argv[1],o); o = dbUnshareStringValue(c->db,c->argv[1],o);
size_t oldlen = sdslen(o->ptr); *strOldSize = sdslen(o->ptr);
o->ptr = sdsgrowzero(o->ptr,byte+1); o->ptr = sdsgrowzero(o->ptr,byte+1);
if (dirty && oldlen != sdslen(o->ptr)) *dirty = 1; *strGrowSize = sdslen(o->ptr) - *strOldSize;
} }
return o; return o;
} }
@ -561,8 +566,9 @@ void setbitCommand(client *c) {
return; return;
} }
int dirty; size_t strOldSize, strGrowSize;
if ((o = lookupStringForBitCommand(c,bitoffset,&dirty)) == NULL) return; if ((o = lookupStringForBitCommand(c,bitoffset,&strOldSize,&strGrowSize)) == NULL)
return;
/* Get current values */ /* Get current values */
byte = bitoffset >> 3; byte = bitoffset >> 3;
@ -573,7 +579,7 @@ void setbitCommand(client *c) {
/* Either it is newly created, changed length, or the bit changes before and after. /* Either it is newly created, changed length, or the bit changes before and after.
* Note that the bitval here is actually a decimal number. * Note that the bitval here is actually a decimal number.
* So we need to use `!!` to convert it to 0 or 1 for comparison. */ * So we need to use `!!` to convert it to 0 or 1 for comparison. */
if (dirty || (!!bitval != on)) { if (strGrowSize || (!!bitval != on)) {
/* Update byte with new bit value. */ /* Update byte with new bit value. */
byteval &= ~(1 << bit); byteval &= ~(1 << bit);
byteval |= ((on & 0x1) << bit); byteval |= ((on & 0x1) << bit);
@ -581,6 +587,13 @@ void setbitCommand(client *c) {
signalModifiedKey(c,c->db,c->argv[1]); signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_STRING,"setbit",c->argv[1],c->db->id); notifyKeyspaceEvent(NOTIFY_STRING,"setbit",c->argv[1],c->db->id);
server.dirty++; server.dirty++;
/* If this is not a new key (old size not 0) and size changed, then
* update the keysizes histogram. Otherwise, the histogram already
* updated in lookupStringForBitCommand() by calling dbAdd(). */
if ((strOldSize > 0) && (strGrowSize != 0))
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_STRING,
strOldSize, strOldSize + strGrowSize);
} }
/* Return original value. */ /* Return original value. */
@ -1065,7 +1078,8 @@ struct bitfieldOp {
void bitfieldGeneric(client *c, int flags) { void bitfieldGeneric(client *c, int flags) {
robj *o; robj *o;
uint64_t bitoffset; uint64_t bitoffset;
int j, numops = 0, changes = 0, dirty = 0; int j, numops = 0, changes = 0;
size_t strOldSize, strGrowSize = 0;
struct bitfieldOp *ops = NULL; /* Array of ops to execute at end. */ struct bitfieldOp *ops = NULL; /* Array of ops to execute at end. */
int owtype = BFOVERFLOW_WRAP; /* Overflow type. */ int owtype = BFOVERFLOW_WRAP; /* Overflow type. */
int readonly = 1; int readonly = 1;
@ -1159,7 +1173,7 @@ void bitfieldGeneric(client *c, int flags) {
/* Lookup by making room up to the farthest bit reached by /* Lookup by making room up to the farthest bit reached by
* this operation. */ * this operation. */
if ((o = lookupStringForBitCommand(c, if ((o = lookupStringForBitCommand(c,
highest_write_offset,&dirty)) == NULL) { highest_write_offset,&strOldSize,&strGrowSize)) == NULL) {
zfree(ops); zfree(ops);
return; return;
} }
@ -1209,7 +1223,7 @@ void bitfieldGeneric(client *c, int flags) {
setSignedBitfield(o->ptr,thisop->offset, setSignedBitfield(o->ptr,thisop->offset,
thisop->bits,newval); thisop->bits,newval);
if (dirty || (oldval != newval)) if (strGrowSize || (oldval != newval))
changes++; changes++;
} else { } else {
addReplyNull(c); addReplyNull(c);
@ -1243,7 +1257,7 @@ void bitfieldGeneric(client *c, int flags) {
setUnsignedBitfield(o->ptr,thisop->offset, setUnsignedBitfield(o->ptr,thisop->offset,
thisop->bits,newval); thisop->bits,newval);
if (dirty || (oldval != newval)) if (strGrowSize || (oldval != newval))
changes++; changes++;
} else { } else {
addReplyNull(c); addReplyNull(c);
@ -1286,6 +1300,14 @@ void bitfieldGeneric(client *c, int flags) {
} }
if (changes) { if (changes) {
/* If this is not a new key (old size not 0) and size changed, then
* update the keysizes histogram. Otherwise, the histogram already
* updated in lookupStringForBitCommand() by calling dbAdd(). */
if ((strOldSize > 0) && (strGrowSize != 0))
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_STRING,
strOldSize, strOldSize + strGrowSize);
signalModifiedKey(c,c->db,c->argv[1]); signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_STRING,"setbit",c->argv[1],c->db->id); notifyKeyspaceEvent(NOTIFY_STRING,"setbit",c->argv[1],c->db->id);
server.dirty += changes; server.dirty += changes;

View File

@ -533,7 +533,7 @@ CallReply *callReplyCreateError(sds reply, void *private_data) {
sdsfree(reply); sdsfree(reply);
} }
list *deferred_error_list = listCreate(); list *deferred_error_list = listCreate();
listSetFreeMethod(deferred_error_list, (void (*)(void*))sdsfree); listSetFreeMethod(deferred_error_list, sdsfreegeneric);
listAddNodeTail(deferred_error_list, sdsnew(err_buff)); listAddNodeTail(deferred_error_list, sdsnew(err_buff));
return callReplyCreate(err_buff, deferred_error_list, private_data); return callReplyCreate(err_buff, deferred_error_list, private_data);
} }

View File

@ -317,7 +317,7 @@ migrateCachedSocket* migrateGetSocket(client *c, robj *host, robj *port, long ti
} }
/* Create the connection */ /* Create the connection */
conn = connCreate(connTypeOfCluster()); conn = connCreate(server.el, connTypeOfCluster());
if (connBlockingConnect(conn, host->ptr, atoi(port->ptr), timeout) if (connBlockingConnect(conn, host->ptr, atoi(port->ptr), timeout)
!= C_OK) { != C_OK) {
addReplyError(c,"-IOERR error or timeout connecting to the client"); addReplyError(c,"-IOERR error or timeout connecting to the client");

View File

@ -1262,7 +1262,7 @@ void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
return; return;
} }
connection *conn = connCreateAccepted(connTypeOfCluster(), cfd, &require_auth); connection *conn = connCreateAccepted(server.el, connTypeOfCluster(), cfd, &require_auth);
/* Make sure connection is not in an error state */ /* Make sure connection is not in an error state */
if (connGetState(conn) != CONN_STATE_ACCEPTING) { if (connGetState(conn) != CONN_STATE_ACCEPTING) {
@ -4583,7 +4583,7 @@ static int clusterNodeCronHandleReconnect(clusterNode *node, mstime_t handshake_
if (node->link == NULL) { if (node->link == NULL) {
clusterLink *link = createClusterLink(node); clusterLink *link = createClusterLink(node);
link->conn = connCreate(connTypeOfCluster()); link->conn = connCreate(server.el, connTypeOfCluster());
connSetPrivateData(link->conn, link); connSetPrivateData(link->conn, link);
if (connConnect(link->conn, node->ip, node->cport, server.bind_source_addr, if (connConnect(link->conn, node->ip, node->cport, server.bind_source_addr,
clusterLinkConnectHandler) == C_ERR) { clusterLinkConnectHandler) == C_ERR) {

View File

@ -1239,6 +1239,9 @@ commandHistory CLIENT_LIST_History[] = {
{"6.2.0","Added `argv-mem`, `tot-mem`, `laddr` and `redir` fields and the optional `ID` filter."}, {"6.2.0","Added `argv-mem`, `tot-mem`, `laddr` and `redir` fields and the optional `ID` filter."},
{"7.0.0","Added `resp`, `multi-mem`, `rbs` and `rbp` fields."}, {"7.0.0","Added `resp`, `multi-mem`, `rbs` and `rbp` fields."},
{"7.0.3","Added `ssub` field."}, {"7.0.3","Added `ssub` field."},
{"7.2.0","Added `lib-name` and `lib-ver` fields."},
{"7.4.0","Added `watch` field."},
{"8.0.0","Added `io-thread` field."},
}; };
#endif #endif
@ -1546,7 +1549,7 @@ struct COMMAND_STRUCT CLIENT_Subcommands[] = {
{MAKE_CMD("id","Returns the unique client ID of the connection.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_ID_History,0,CLIENT_ID_Tips,0,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_ID_Keyspecs,0,NULL,0)}, {MAKE_CMD("id","Returns the unique client ID of the connection.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_ID_History,0,CLIENT_ID_Tips,0,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_ID_Keyspecs,0,NULL,0)},
{MAKE_CMD("info","Returns information about the connection.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_INFO_History,0,CLIENT_INFO_Tips,1,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_INFO_Keyspecs,0,NULL,0)}, {MAKE_CMD("info","Returns information about the connection.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_INFO_History,0,CLIENT_INFO_Tips,1,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_INFO_Keyspecs,0,NULL,0)},
{MAKE_CMD("kill","Terminates open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_KILL_History,6,CLIENT_KILL_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_KILL_Keyspecs,0,NULL,1),.args=CLIENT_KILL_Args}, {MAKE_CMD("kill","Terminates open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_KILL_History,6,CLIENT_KILL_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_KILL_Keyspecs,0,NULL,1),.args=CLIENT_KILL_Args},
{MAKE_CMD("list","Lists open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_LIST_History,6,CLIENT_LIST_Tips,1,clientCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_LIST_Keyspecs,0,NULL,2),.args=CLIENT_LIST_Args}, {MAKE_CMD("list","Lists open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_LIST_History,9,CLIENT_LIST_Tips,1,clientCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_LIST_Keyspecs,0,NULL,2),.args=CLIENT_LIST_Args},
{MAKE_CMD("no-evict","Sets the client eviction mode of the connection.","O(1)","7.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_NO_EVICT_History,0,CLIENT_NO_EVICT_Tips,0,clientCommand,3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_NO_EVICT_Keyspecs,0,NULL,1),.args=CLIENT_NO_EVICT_Args}, {MAKE_CMD("no-evict","Sets the client eviction mode of the connection.","O(1)","7.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_NO_EVICT_History,0,CLIENT_NO_EVICT_Tips,0,clientCommand,3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_NO_EVICT_Keyspecs,0,NULL,1),.args=CLIENT_NO_EVICT_Args},
{MAKE_CMD("no-touch","Controls whether commands sent by the client affect the LRU/LFU of accessed keys.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_NO_TOUCH_History,0,CLIENT_NO_TOUCH_Tips,0,clientCommand,3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,CLIENT_NO_TOUCH_Keyspecs,0,NULL,1),.args=CLIENT_NO_TOUCH_Args}, {MAKE_CMD("no-touch","Controls whether commands sent by the client affect the LRU/LFU of accessed keys.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_NO_TOUCH_History,0,CLIENT_NO_TOUCH_Tips,0,clientCommand,3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,CLIENT_NO_TOUCH_Keyspecs,0,NULL,1),.args=CLIENT_NO_TOUCH_Args},
{MAKE_CMD("pause","Suspends commands processing.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_PAUSE_History,1,CLIENT_PAUSE_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_PAUSE_Keyspecs,0,NULL,2),.args=CLIENT_PAUSE_Args}, {MAKE_CMD("pause","Suspends commands processing.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_PAUSE_History,1,CLIENT_PAUSE_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_PAUSE_Keyspecs,0,NULL,2),.args=CLIENT_PAUSE_Args},

View File

@ -31,6 +31,18 @@
[ [
"7.0.3", "7.0.3",
"Added `ssub` field." "Added `ssub` field."
],
[
"7.2.0",
"Added `lib-name` and `lib-ver` fields."
],
[
"7.4.0",
"Added `watch` field."
],
[
"8.0.0",
"Added `io-thread` field."
] ]
], ],
"command_flags": [ "command_flags": [

View File

@ -44,6 +44,9 @@
"lua.caches": { "lua.caches": {
"type": "integer" "type": "integer"
}, },
"script.VMs": {
"type": "integer"
},
"functions.caches": { "functions.caches": {
"type": "integer" "type": "integer"
}, },

View File

@ -3,6 +3,9 @@
* Copyright (c) 2009-Present, Redis Ltd. * Copyright (c) 2009-Present, Redis Ltd.
* All rights reserved. * All rights reserved.
* *
* Copyright (c) 2024-present, Valkey contributors.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0 * Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2) or the Server Side Public License v1 (SSPLv1). * (RSALv2) or the Server Side Public License v1 (SSPLv1).
* *
@ -268,7 +271,7 @@ dict *configs = NULL; /* Runtime config values */
/* Lookup a config by the provided sds string name, or return NULL /* Lookup a config by the provided sds string name, or return NULL
* if the config does not exist */ * if the config does not exist */
static standardConfig *lookupConfig(sds name) { static standardConfig *lookupConfig(const sds name) {
dictEntry *de = dictFind(configs, name); dictEntry *de = dictFind(configs, name);
return de ? dictGetVal(de) : NULL; return de ? dictGetVal(de) : NULL;
} }
@ -430,6 +433,7 @@ void loadServerConfigFromString(char *config) {
{"list-max-ziplist-entries", 2, 2}, {"list-max-ziplist-entries", 2, 2},
{"list-max-ziplist-value", 2, 2}, {"list-max-ziplist-value", 2, 2},
{"lua-replicate-commands", 2, 2}, {"lua-replicate-commands", 2, 2},
{"io-threads-do-reads", 2, 2},
{NULL, 0}, {NULL, 0},
}; };
char buf[1024]; char buf[1024];
@ -552,16 +556,6 @@ void loadServerConfigFromString(char *config) {
} }
} else if (!strcasecmp(argv[0],"loadmodule") && argc >= 2) { } else if (!strcasecmp(argv[0],"loadmodule") && argc >= 2) {
queueLoadModule(argv[1],&argv[2],argc-2); queueLoadModule(argv[1],&argv[2],argc-2);
} else if (strchr(argv[0], '.')) {
if (argc < 2) {
err = "Module config specified without value";
goto loaderr;
}
sds name = sdsdup(argv[0]);
sds val = sdsdup(argv[1]);
for (int i = 2; i < argc; i++)
val = sdscatfmt(val, " %S", argv[i]);
if (!dictReplace(server.module_configs_queue, name, val)) sdsfree(name);
} else if (!strcasecmp(argv[0],"sentinel")) { } else if (!strcasecmp(argv[0],"sentinel")) {
/* argc == 1 is handled by main() as we need to enter the sentinel /* argc == 1 is handled by main() as we need to enter the sentinel
* mode ASAP. */ * mode ASAP. */
@ -573,7 +567,20 @@ void loadServerConfigFromString(char *config) {
queueSentinelConfig(argv+1,argc-1,linenum,lines[i]); queueSentinelConfig(argv+1,argc-1,linenum,lines[i]);
} }
} else { } else {
err = "Bad directive or wrong number of arguments"; goto loaderr; /* Collect all unknown configurations into `module_configs_queue`.
* These may include valid module configurations or invalid ones.
* They will be validated later by loadModuleConfigs() against the
* configurations declared by the loaded module(s). */
if (argc < 2) {
err = "Bad directive or wrong number of arguments";
goto loaderr;
}
sds name = sdsdup(argv[0]);
sds val = sdsdup(argv[1]);
for (int i = 2; i < argc; i++)
val = sdscatfmt(val, " %S", argv[i]);
if (!dictReplace(server.module_configs_queue, name, val)) sdsfree(name);
} }
sdsfreesplitres(argv,argc); sdsfreesplitres(argv,argc);
argv = NULL; argv = NULL;
@ -2547,11 +2554,10 @@ static int updateMaxclients(const char **err) {
*err = msg; *err = msg;
return 0; return 0;
} }
if ((unsigned int) aeGetSetSize(server.el) < size_t newsize = server.maxclients + CONFIG_FDSET_INCR;
server.maxclients + CONFIG_FDSET_INCR) if ((unsigned int) aeGetSetSize(server.el) < newsize) {
{ if (aeResizeSetSize(server.el, newsize) == AE_ERR ||
if (aeResizeSetSize(server.el, resizeAllIOThreadsEventLoops(newsize) == AE_ERR)
server.maxclients + CONFIG_FDSET_INCR) == AE_ERR)
{ {
*err = "The event loop API used by Redis is not able to handle the specified number of clients"; *err = "The event loop API used by Redis is not able to handle the specified number of clients";
return 0; return 0;
@ -3032,6 +3038,7 @@ static int applyClientMaxMemoryUsage(const char **err) {
if (server.maxmemory_clients != 0) if (server.maxmemory_clients != 0)
initServerClientMemUsageBuckets(); initServerClientMemUsageBuckets();
pauseAllIOThreads();
/* When client eviction is enabled update memory buckets for all clients. /* When client eviction is enabled update memory buckets for all clients.
* When disabled, clear that data structure. */ * When disabled, clear that data structure. */
listRewind(server.clients, &li); listRewind(server.clients, &li);
@ -3045,6 +3052,7 @@ static int applyClientMaxMemoryUsage(const char **err) {
updateClientMemUsageAndBucket(c); updateClientMemUsageAndBucket(c);
} }
} }
resumeAllIOThreads();
if (server.maxmemory_clients == 0) if (server.maxmemory_clients == 0)
freeServerClientMemUsageBuckets(); freeServerClientMemUsageBuckets();
@ -3071,6 +3079,7 @@ standardConfig static_configs[] = {
createBoolConfig("lazyfree-lazy-user-flush", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.lazyfree_lazy_user_flush , 0, NULL, NULL), createBoolConfig("lazyfree-lazy-user-flush", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.lazyfree_lazy_user_flush , 0, NULL, NULL),
createBoolConfig("repl-disable-tcp-nodelay", NULL, MODIFIABLE_CONFIG, server.repl_disable_tcp_nodelay, 0, NULL, NULL), createBoolConfig("repl-disable-tcp-nodelay", NULL, MODIFIABLE_CONFIG, server.repl_disable_tcp_nodelay, 0, NULL, NULL),
createBoolConfig("repl-diskless-sync", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.repl_diskless_sync, 1, NULL, NULL), createBoolConfig("repl-diskless-sync", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.repl_diskless_sync, 1, NULL, NULL),
createBoolConfig("repl-rdb-channel", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, server.repl_rdb_channel, 1, NULL, NULL),
createBoolConfig("aof-rewrite-incremental-fsync", NULL, MODIFIABLE_CONFIG, server.aof_rewrite_incremental_fsync, 1, NULL, NULL), createBoolConfig("aof-rewrite-incremental-fsync", NULL, MODIFIABLE_CONFIG, server.aof_rewrite_incremental_fsync, 1, NULL, NULL),
createBoolConfig("no-appendfsync-on-rewrite", NULL, MODIFIABLE_CONFIG, server.aof_no_fsync_on_rewrite, 0, NULL, NULL), createBoolConfig("no-appendfsync-on-rewrite", NULL, MODIFIABLE_CONFIG, server.aof_no_fsync_on_rewrite, 0, NULL, NULL),
createBoolConfig("cluster-require-full-coverage", NULL, MODIFIABLE_CONFIG, server.cluster_require_full_coverage, 1, NULL, NULL), createBoolConfig("cluster-require-full-coverage", NULL, MODIFIABLE_CONFIG, server.cluster_require_full_coverage, 1, NULL, NULL),
@ -3213,6 +3222,7 @@ standardConfig static_configs[] = {
createLongLongConfig("proto-max-bulk-len", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, 1024*1024, LONG_MAX, server.proto_max_bulk_len, 512ll*1024*1024, MEMORY_CONFIG, NULL, NULL), /* Bulk request max size */ createLongLongConfig("proto-max-bulk-len", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, 1024*1024, LONG_MAX, server.proto_max_bulk_len, 512ll*1024*1024, MEMORY_CONFIG, NULL, NULL), /* Bulk request max size */
createLongLongConfig("stream-node-max-entries", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.stream_node_max_entries, 100, INTEGER_CONFIG, NULL, NULL), createLongLongConfig("stream-node-max-entries", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.stream_node_max_entries, 100, INTEGER_CONFIG, NULL, NULL),
createLongLongConfig("repl-backlog-size", NULL, MODIFIABLE_CONFIG, 1, LLONG_MAX, server.repl_backlog_size, 1024*1024, MEMORY_CONFIG, NULL, updateReplBacklogSize), /* Default: 1mb */ createLongLongConfig("repl-backlog-size", NULL, MODIFIABLE_CONFIG, 1, LLONG_MAX, server.repl_backlog_size, 1024*1024, MEMORY_CONFIG, NULL, updateReplBacklogSize), /* Default: 1mb */
createLongLongConfig("replica-full-sync-buffer-limit", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.repl_full_sync_buffer_limit, 0, MEMORY_CONFIG, NULL, NULL), /* Default: Inherits 'client-output-buffer-limit <replica>' */
/* Unsigned Long Long configs */ /* Unsigned Long Long configs */
createULongLongConfig("maxmemory", NULL, MODIFIABLE_CONFIG, 0, ULLONG_MAX, server.maxmemory, 0, MEMORY_CONFIG, NULL, updateMaxmemory), createULongLongConfig("maxmemory", NULL, MODIFIABLE_CONFIG, 0, ULLONG_MAX, server.maxmemory, 0, MEMORY_CONFIG, NULL, updateMaxmemory),
@ -3312,16 +3322,34 @@ void removeConfig(sds name) {
standardConfig *config = lookupConfig(name); standardConfig *config = lookupConfig(name);
if (!config) return; if (!config) return;
if (config->flags & MODULE_CONFIG) { if (config->flags & MODULE_CONFIG) {
sdsfree((sds) config->name); sdsfree((sds) config->name);
if (config->type == ENUM_CONFIG) { sdsfree((sds) config->alias);
configEnum *enumNode = config->data.enumd.enum_value;
while(enumNode->name != NULL) { switch (config->type) {
zfree(enumNode->name); case BOOL_CONFIG:
enumNode++; break;
} case NUMERIC_CONFIG:
zfree(config->data.enumd.enum_value); break;
} else if (config->type == SDS_CONFIG) { case SDS_CONFIG:
if (config->data.sds.default_value) sdsfree((sds)config->data.sds.default_value); if (config->data.sds.default_value)
sdsfree((sds)config->data.sds.default_value);
break;
case ENUM_CONFIG:
{
configEnum *enumNode = config->data.enumd.enum_value;
while(enumNode->name != NULL) {
zfree(enumNode->name);
enumNode++;
}
zfree(config->data.enumd.enum_value);
}
break;
case SPECIAL_CONFIG: /* Not used by modules */
case STRING_CONFIG: /* Not used by modules */
default:
serverAssert(0);
break;
} }
} }
dictDelete(configs, name); dictDelete(configs, name);
@ -3332,40 +3360,77 @@ void removeConfig(sds name) {
*----------------------------------------------------------------------------*/ *----------------------------------------------------------------------------*/
/* Create a bool/string/enum/numeric standardConfig for a module config in the configs dictionary */ /* Create a bool/string/enum/numeric standardConfig for a module config in the configs dictionary */
void addModuleBoolConfig(const char *module_name, const char *name, int flags, void *privdata, int default_val) {
sds config_name = sdscatfmt(sdsempty(), "%s.%s", module_name, name); /* On removeConfig(), name and alias will be sdsfree() */
void addModuleBoolConfig(sds name, sds alias, int flags, void *privdata, int default_val) {
int config_dummy_address; int config_dummy_address;
standardConfig module_config = createBoolConfig(config_name, NULL, flags | MODULE_CONFIG, config_dummy_address, default_val, NULL, NULL); standardConfig sc = createBoolConfig(name, alias, flags | MODULE_CONFIG, config_dummy_address, default_val, NULL, NULL);
module_config.data.yesno.config = NULL; sc.data.yesno.config = NULL;
module_config.privdata = privdata; sc.privdata = privdata;
registerConfigValue(config_name, &module_config, 0); registerConfigValue(name, &sc, 0);
/* If alias available, deep copy standardConfig and register again */
if (alias) {
sc.name = sdsdup(name);
sc.alias = sdsdup(alias);
registerConfigValue(sc.alias, &sc, 1);
}
} }
void addModuleStringConfig(const char *module_name, const char *name, int flags, void *privdata, sds default_val) { /* On removeConfig(), name, default_val, and alias will be sdsfree() */
sds config_name = sdscatfmt(sdsempty(), "%s.%s", module_name, name); void addModuleStringConfig(sds name, sds alias, int flags, void *privdata, sds default_val) {
sds config_dummy_address; sds config_dummy_address;
standardConfig module_config = createSDSConfig(config_name, NULL, flags | MODULE_CONFIG, 0, config_dummy_address, default_val, NULL, NULL); standardConfig sc = createSDSConfig(name, alias, flags | MODULE_CONFIG, 0, config_dummy_address, default_val, NULL, NULL);
module_config.data.sds.config = NULL; sc.data.sds.config = NULL;
module_config.privdata = privdata; sc.privdata = privdata;
registerConfigValue(config_name, &module_config, 0); registerConfigValue(name, &sc, 0); /* memcpy sc */
/* If alias available, deep copy standardConfig and register again */
if (alias) {
sc.name = sdsdup(name);
sc.alias = sdsdup(alias);
if (default_val) sc.data.sds.default_value = sdsdup(default_val);
registerConfigValue(sc.alias, &sc, 1);
}
} }
void addModuleEnumConfig(const char *module_name, const char *name, int flags, void *privdata, int default_val, configEnum *enum_vals) { /* On removeConfig(), name, default_val, alias and enum_vals will be freed */
sds config_name = sdscatfmt(sdsempty(), "%s.%s", module_name, name); void addModuleEnumConfig(sds name, sds alias, int flags, void *privdata, int default_val, configEnum *enum_vals, int num_enum_vals) {
int config_dummy_address; int config_dummy_address;
standardConfig module_config = createEnumConfig(config_name, NULL, flags | MODULE_CONFIG, enum_vals, config_dummy_address, default_val, NULL, NULL); standardConfig sc = createEnumConfig(name, alias, flags | MODULE_CONFIG, enum_vals, config_dummy_address, default_val, NULL, NULL);
module_config.data.enumd.config = NULL; sc.data.enumd.config = NULL;
module_config.privdata = privdata; sc.privdata = privdata;
registerConfigValue(config_name, &module_config, 0); registerConfigValue(name, &sc, 0);
/* If alias available, deep copy standardConfig and register again */
if (alias) {
sc.name = sdsdup(name);
sc.alias = sdsdup(alias);
sc.data.enumd.enum_value = zmalloc((num_enum_vals + 1) * sizeof(configEnum));
for (int i = 0; i < num_enum_vals; i++) {
sc.data.enumd.enum_value[i].name = zstrdup(enum_vals[i].name);
sc.data.enumd.enum_value[i].val = enum_vals[i].val;
}
sc.data.enumd.enum_value[num_enum_vals].name = NULL;
sc.data.enumd.enum_value[num_enum_vals].val = 0;
registerConfigValue(sc.alias, &sc, 1);
}
} }
void addModuleNumericConfig(const char *module_name, const char *name, int flags, void *privdata, long long default_val, int conf_flags, long long lower, long long upper) { /* On removeConfig(), it will free name, and alias if it is not NULL */
sds config_name = sdscatfmt(sdsempty(), "%s.%s", module_name, name); void addModuleNumericConfig(sds name, sds alias, int flags, void *privdata, long long default_val, int conf_flags, long long lower, long long upper) {
long long config_dummy_address; long long config_dummy_address;
standardConfig module_config = createLongLongConfig(config_name, NULL, flags | MODULE_CONFIG, lower, upper, config_dummy_address, default_val, conf_flags, NULL, NULL); standardConfig sc = createLongLongConfig(name, alias, flags | MODULE_CONFIG, lower, upper, config_dummy_address, default_val, conf_flags, NULL, NULL);
module_config.data.numeric.config.ll = NULL; sc.data.numeric.config.ll = NULL;
module_config.privdata = privdata; sc.privdata = privdata;
registerConfigValue(config_name, &module_config, 0); registerConfigValue(name, &sc, 0);
/* If alias available, deep copy standardConfig and register again */
if (alias) {
sc.name = sdsdup(name);
sc.alias = sdsdup(alias);
registerConfigValue(sc.alias, &sc, 1);
}
} }
/*----------------------------------------------------------------------------- /*-----------------------------------------------------------------------------
@ -3418,3 +3483,7 @@ void configRewriteCommand(client *c) {
addReply(c,shared.ok); addReply(c,shared.ok);
} }
} }
int configExists(const sds name) {
return lookupConfig(name) != NULL;
}

View File

@ -47,6 +47,7 @@
#define HAVE_PROC_SMAPS 1 #define HAVE_PROC_SMAPS 1
#define HAVE_PROC_SOMAXCONN 1 #define HAVE_PROC_SOMAXCONN 1
#define HAVE_PROC_OOM_SCORE_ADJ 1 #define HAVE_PROC_OOM_SCORE_ADJ 1
#define HAVE_EVENT_FD 1
#endif #endif
/* Test for task_info() */ /* Test for task_info() */
@ -101,6 +102,25 @@
#endif #endif
#endif #endif
/* Test for __builtin_prefetch()
* Supported in LLVM since 2.9: https://releases.llvm.org/2.9/docs/ReleaseNotes.html
* Supported in GCC since 3.1 but we use 4.9 given it's too old: https://gcc.gnu.org/gcc-3.1/changes.html. */
#if defined(__clang__) && (__clang_major__ > 2 || (__clang_major__ == 2 && __clang_minor__ >= 9))
#define HAS_BUILTIN_PREFETCH 1
#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9))
#define HAS_BUILTIN_PREFETCH 1
#else
#define HAS_BUILTIN_PREFETCH 0
#endif
#if HAS_BUILTIN_PREFETCH
#define redis_prefetch_read(addr) __builtin_prefetch(addr, 0, 3) /* Read with high locality */
#define redis_prefetch_write(addr) __builtin_prefetch(addr, 1, 3) /* Write with high locality */
#else
#define redis_prefetch_read(addr) ((void)(addr)) /* No-op if unsupported */
#define redis_prefetch_write(addr) ((void)(addr)) /* No-op if unsupported */
#endif
/* Define redis_fsync to fdatasync() in Linux and fsync() for all the rest */ /* Define redis_fsync to fdatasync() in Linux and fsync() for all the rest */
#if defined(__linux__) #if defined(__linux__)
#define redis_fsync(fd) fdatasync(fd) #define redis_fsync(fd) fdatasync(fd)
@ -318,4 +338,17 @@ void setcpuaffinity(const char *cpulist);
#define ATTRIBUTE_TARGET_POPCNT #define ATTRIBUTE_TARGET_POPCNT
#endif #endif
/* Check if we can compile AVX2 code */
#if defined (__x86_64__) && ((defined(__GNUC__) && __GNUC__ >= 5) || (defined(__clang__) && __clang_major__ >= 4))
#if defined(__has_attribute) && __has_attribute(target)
#define HAVE_AVX2
#endif
#endif
#if defined (HAVE_AVX2)
#define ATTRIBUTE_TARGET_AVX2 __attribute__((target("avx2")))
#else
#define ATTRIBUTE_TARGET_AVX2
#endif
#endif #endif

View File

@ -156,14 +156,14 @@ void connTypeCleanupAll(void) {
} }
/* walk all the connection types until has pending data */ /* walk all the connection types until has pending data */
int connTypeHasPendingData(void) { int connTypeHasPendingData(struct aeEventLoop *el) {
ConnectionType *ct; ConnectionType *ct;
int type; int type;
int ret = 0; int ret = 0;
for (type = 0; type < CONN_TYPE_MAX; type++) { for (type = 0; type < CONN_TYPE_MAX; type++) {
ct = connTypes[type]; ct = connTypes[type];
if (ct && ct->has_pending_data && (ret = ct->has_pending_data())) { if (ct && ct->has_pending_data && (ret = ct->has_pending_data(el))) {
return ret; return ret;
} }
} }
@ -172,7 +172,7 @@ int connTypeHasPendingData(void) {
} }
/* walk all the connection types and process pending data for each connection type */ /* walk all the connection types and process pending data for each connection type */
int connTypeProcessPendingData(void) { int connTypeProcessPendingData(struct aeEventLoop *el) {
ConnectionType *ct; ConnectionType *ct;
int type; int type;
int ret = 0; int ret = 0;
@ -180,7 +180,7 @@ int connTypeProcessPendingData(void) {
for (type = 0; type < CONN_TYPE_MAX; type++) { for (type = 0; type < CONN_TYPE_MAX; type++) {
ct = connTypes[type]; ct = connTypes[type];
if (ct && ct->process_pending_data) { if (ct && ct->process_pending_data) {
ret += ct->process_pending_data(); ret += ct->process_pending_data(el);
} }
} }

View File

@ -60,8 +60,8 @@ typedef struct ConnectionType {
int (*listen)(connListener *listener); int (*listen)(connListener *listener);
/* create/shutdown/close connection */ /* create/shutdown/close connection */
connection* (*conn_create)(void); connection* (*conn_create)(struct aeEventLoop *el);
connection* (*conn_create_accepted)(int fd, void *priv); connection* (*conn_create_accepted)(struct aeEventLoop *el, int fd, void *priv);
void (*shutdown)(struct connection *conn); void (*shutdown)(struct connection *conn);
void (*close)(struct connection *conn); void (*close)(struct connection *conn);
@ -81,9 +81,13 @@ typedef struct ConnectionType {
ssize_t (*sync_read)(struct connection *conn, char *ptr, ssize_t size, long long timeout); ssize_t (*sync_read)(struct connection *conn, char *ptr, ssize_t size, long long timeout);
ssize_t (*sync_readline)(struct connection *conn, char *ptr, ssize_t size, long long timeout); ssize_t (*sync_readline)(struct connection *conn, char *ptr, ssize_t size, long long timeout);
/* event loop */
void (*unbind_event_loop)(struct connection *conn);
int (*rebind_event_loop)(struct connection *conn, aeEventLoop *el);
/* pending data */ /* pending data */
int (*has_pending_data)(void); int (*has_pending_data)(struct aeEventLoop *el);
int (*process_pending_data)(void); int (*process_pending_data)(struct aeEventLoop *el);
/* TLS specified methods */ /* TLS specified methods */
sds (*get_peer_cert)(struct connection *conn); sds (*get_peer_cert)(struct connection *conn);
@ -98,6 +102,7 @@ struct connection {
short int refs; short int refs;
unsigned short int iovcnt; unsigned short int iovcnt;
void *private_data; void *private_data;
struct aeEventLoop *el;
ConnectionCallbackFunc conn_handler; ConnectionCallbackFunc conn_handler;
ConnectionCallbackFunc write_handler; ConnectionCallbackFunc write_handler;
ConnectionCallbackFunc read_handler; ConnectionCallbackFunc read_handler;
@ -319,6 +324,28 @@ static inline int connHasReadHandler(connection *conn) {
return conn->read_handler != NULL; return conn->read_handler != NULL;
} }
/* Returns true if the connection is bound to an event loop */
static inline int connHasEventLoop(connection *conn) {
return conn->el != NULL;
}
/* Unbind the current event loop from the connection, so that it can be
* rebind to a different event loop in the future. */
static inline void connUnbindEventLoop(connection *conn) {
if (conn->el == NULL) return;
connSetReadHandler(conn, NULL);
connSetWriteHandler(conn, NULL);
if (conn->type->unbind_event_loop)
conn->type->unbind_event_loop(conn);
conn->el = NULL;
}
/* Rebind the connection to another event loop, read/write handlers must not
* be installed in the current event loop */
static inline int connRebindEventLoop(connection *conn, aeEventLoop *el) {
return conn->type->rebind_event_loop(conn, el);
}
/* Associate a private data pointer with the connection */ /* Associate a private data pointer with the connection */
static inline void connSetPrivateData(connection *conn, void *data) { static inline void connSetPrivateData(connection *conn, void *data) {
conn->private_data = data; conn->private_data = data;
@ -379,14 +406,14 @@ ConnectionType *connectionTypeUnix(void);
int connectionIndexByType(const char *typename); int connectionIndexByType(const char *typename);
/* Create a connection of specified type */ /* Create a connection of specified type */
static inline connection *connCreate(ConnectionType *ct) { static inline connection *connCreate(struct aeEventLoop *el, ConnectionType *ct) {
return ct->conn_create(); return ct->conn_create(el);
} }
/* Create an accepted connection of specified type. /* Create an accepted connection of specified type.
* priv is connection type specified argument */ * priv is connection type specified argument */
static inline connection *connCreateAccepted(ConnectionType *ct, int fd, void *priv) { static inline connection *connCreateAccepted(struct aeEventLoop *el, ConnectionType *ct, int fd, void *priv) {
return ct->conn_create_accepted(fd, priv); return ct->conn_create_accepted(el, fd, priv);
} }
/* Configure a connection type. A typical case is to configure TLS. /* Configure a connection type. A typical case is to configure TLS.
@ -400,10 +427,10 @@ static inline int connTypeConfigure(ConnectionType *ct, void *priv, int reconfig
void connTypeCleanupAll(void); void connTypeCleanupAll(void);
/* Test all the connection type has pending data or not. */ /* Test all the connection type has pending data or not. */
int connTypeHasPendingData(void); int connTypeHasPendingData(struct aeEventLoop *el);
/* walk all the connection types and process pending data for each connection type */ /* walk all the connection types and process pending data for each connection type */
int connTypeProcessPendingData(void); int connTypeProcessPendingData(struct aeEventLoop *el);
/* Listen on an initialized listener */ /* Listen on an initialized listener */
static inline int connListen(connListener *listener) { static inline int connListen(connListener *listener) {

View File

@ -26,8 +26,11 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE. */ * POSSIBILITY OF SUCH DAMAGE. */
#include <stdlib.h>
#include "crc64.h" #include "crc64.h"
#include "crcspeed.h" #include "crcspeed.h"
#include "redisassert.h"
#include "testhelp.h"
static uint64_t crc64_table[8][256] = {{0}}; static uint64_t crc64_table[8][256] = {{0}};
#define POLY UINT64_C(0xad93d23594c935a9) #define POLY UINT64_C(0xad93d23594c935a9)
@ -67,14 +70,33 @@ static uint64_t crc64_table[8][256] = {{0}};
* \return The reflected data. * \return The reflected data.
*****************************************************************************/ *****************************************************************************/
static inline uint_fast64_t crc_reflect(uint_fast64_t data, size_t data_len) { static inline uint_fast64_t crc_reflect(uint_fast64_t data, size_t data_len) {
uint_fast64_t ret = data & 0x01; /* only ever called for data_len == 64 in this codebase
*
* Borrowed from bit twiddling hacks, original in the public domain.
* https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
* Extended to 64 bits, and added byteswap for final 3 steps.
* 16-30x 64-bit operations, no comparisons (16 for native byteswap, 30 for pure C)
*/
for (size_t i = 1; i < data_len; i++) { assert(data_len <= 64);
data >>= 1; /* swap odd and even bits */
ret = (ret << 1) | (data & 0x01); data = ((data >> 1) & 0x5555555555555555ULL) | ((data & 0x5555555555555555ULL) << 1);
} /* swap consecutive pairs */
data = ((data >> 2) & 0x3333333333333333ULL) | ((data & 0x3333333333333333ULL) << 2);
return ret; /* swap nibbles ... */
data = ((data >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((data & 0x0F0F0F0F0F0F0F0FULL) << 4);
#if defined(__GNUC__) || defined(__clang__)
data = __builtin_bswap64(data);
#else
/* swap bytes */
data = ((data >> 8) & 0x00FF00FF00FF00FFULL) | ((data & 0x00FF00FF00FF00FFULL) << 8);
/* swap 2-byte long pairs */
data = ( data >> 16 & 0xFFFF0000FFFFULL) | ((data & 0xFFFF0000FFFFULL) << 16);
/* swap 4-byte quads */
data = ( data >> 32 & 0xFFFFFFFFULL) | ((data & 0xFFFFFFFFULL) << 32);
#endif
/* adjust for non-64-bit reversals */
return data >> (64 - data_len);
} }
/** /**
@ -126,36 +148,221 @@ uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l) {
#ifdef REDIS_TEST #ifdef REDIS_TEST
#include <stdio.h> #include <stdio.h>
static void genBenchmarkRandomData(char *data, int count);
static int bench_crc64(unsigned char *data, uint64_t size, long long passes, uint64_t check, char *name, int csv);
static void bench_combine(char *label, uint64_t size, uint64_t expect, int csv);
long long _ustime(void);
#include <inttypes.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#include <unistd.h>
#include "zmalloc.h"
#include "crccombine.h"
long long _ustime(void) {
struct timeval tv;
long long ust;
gettimeofday(&tv, NULL);
ust = ((long long)tv.tv_sec)*1000000;
ust += tv.tv_usec;
return ust;
}
static int bench_crc64(unsigned char *data, uint64_t size, long long passes, uint64_t check, char *name, int csv) {
uint64_t min = size, hash;
long long original_start = _ustime(), original_end;
for (long long i=passes; i > 0; i--) {
hash = crc64(0, data, size);
}
original_end = _ustime();
min = (original_end - original_start) * 1000 / passes;
/* approximate nanoseconds without nstime */
if (csv) {
printf("%s,%" PRIu64 ",%" PRIu64 ",%d\n",
name, size, (1000 * size) / min, hash == check);
} else {
printf("test size=%" PRIu64 " algorithm=%s %" PRIu64 " M/sec matches=%d\n",
size, name, (1000 * size) / min, hash == check);
}
return hash != check;
}
const uint64_t BENCH_RPOLY = UINT64_C(0x95ac9329ac4bc9b5);
static void bench_combine(char *label, uint64_t size, uint64_t expect, int csv) {
uint64_t min = size, start = expect, thash = expect ^ (expect >> 17);
long long original_start = _ustime(), original_end;
for (int i=0; i < 1000; i++) {
crc64_combine(thash, start, size, BENCH_RPOLY, 64);
}
original_end = _ustime();
/* ran 1000 times, want ns per, counted us per 1000 ... */
min = original_end - original_start;
if (csv) {
printf("%s,%" PRIu64 ",%" PRIu64 "\n", label, size, min);
} else {
printf("%s size=%" PRIu64 " in %" PRIu64 " nsec\n", label, size, min);
}
}
static void genBenchmarkRandomData(char *data, int count) {
static uint32_t state = 1234;
int i = 0;
while (count--) {
state = (state*1103515245+12345);
data[i++] = '0'+((state>>16)&63);
}
}
#define UNUSED(x) (void)(x) #define UNUSED(x) (void)(x)
int crc64Test(int argc, char *argv[], int flags) { int crc64Test(int argc, char *argv[], int flags) {
UNUSED(argc);
UNUSED(argv); uint64_t crc64_test_size = 0;
UNUSED(flags); int i, lastarg, csv = 0, loop = 0, combine = 0, testAll = 0;
crc64_init();
printf("[calcula]: e9c6d914c4b8d9ca == %016" PRIx64 "\n", again:
(uint64_t)_crc64(0, "123456789", 9)); if ((argc>=4) && (!strcmp(argv[3],"custom"))) {
printf("[64speed]: e9c6d914c4b8d9ca == %016" PRIx64 "\n", for (i = 4; i < argc; i++) {
(uint64_t)crc64(0, (unsigned char*)"123456789", 9)); lastarg = (i == (argc - 1));
char li[] = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed " if (!strcmp(argv[i], "--help")) {
"do eiusmod tempor incididunt ut labore et dolore magna " goto usage;
"aliqua. Ut enim ad minim veniam, quis nostrud exercitation " } else if (!strcmp(argv[i], "--csv")) {
"ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis " csv = 1;
"aute irure dolor in reprehenderit in voluptate velit esse " } else if (!strcmp(argv[i], "-l")) {
"cillum dolore eu fugiat nulla pariatur. Excepteur sint " loop = 1;
"occaecat cupidatat non proident, sunt in culpa qui officia " } else if (!strcmp(argv[i], "--crc")) {
"deserunt mollit anim id est laborum."; if (lastarg) goto invalid;
printf("[calcula]: c7794709e69683b3 == %016" PRIx64 "\n", crc64_test_size = atoll(argv[++i]);
(uint64_t)_crc64(0, li, sizeof(li))); } else if (!strcmp(argv[i], "--combine")) {
printf("[64speed]: c7794709e69683b3 == %016" PRIx64 "\n", combine = 1;
(uint64_t)crc64(0, (unsigned char*)li, sizeof(li))); } else {
invalid:
printf("Invalid option \"%s\" or option argument missing\n\n",
argv[i]);
usage:
printf(
"Usage: crc64 [OPTIONS]\n\n"
" --csv Output in CSV format\n"
" -l Loop. Run the tests forever\n"
" --crc <bytes> Benchmark crc64 faster options, using a buffer this big, and quit when done.\n"
" --combine Benchmark crc64 combine value ranges and timings.\n"
);
return 1;
}
}
} else {
crc64_test_size = 50000;
testAll = 1;
if (flags & REDIS_TEST_ACCURATE) crc64_test_size = 5000000;
}
if ((crc64_test_size == 0 && combine == 0) || testAll) {
crc64_init();
printf("[calcula]: e9c6d914c4b8d9ca == %016" PRIx64 "\n",
(uint64_t)_crc64(0, "123456789", 9));
printf("[64speed]: e9c6d914c4b8d9ca == %016" PRIx64 "\n",
(uint64_t)crc64(0, (unsigned char*)"123456789", 9));
char li[] = "Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed "
"do eiusmod tempor incididunt ut labore et dolore magna "
"aliqua. Ut enim ad minim veniam, quis nostrud exercitation "
"ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis "
"aute irure dolor in reprehenderit in voluptate velit esse "
"cillum dolore eu fugiat nulla pariatur. Excepteur sint "
"occaecat cupidatat non proident, sunt in culpa qui officia "
"deserunt mollit anim id est laborum.";
printf("[calcula]: c7794709e69683b3 == %016" PRIx64 "\n",
(uint64_t)_crc64(0, li, sizeof(li)));
printf("[64speed]: c7794709e69683b3 == %016" PRIx64 "\n",
(uint64_t)crc64(0, (unsigned char*)li, sizeof(li)));
if (!testAll) return 0;
}
int init_this_loop = 1;
long long init_start, init_end;
do {
unsigned char* data = NULL;
uint64_t passes = 0;
if (crc64_test_size) {
data = zmalloc(crc64_test_size);
genBenchmarkRandomData((char*)data, crc64_test_size);
/* We want to hash about 1 gig of data in total, looped, to get a good
* idea of our performance.
*/
passes = (UINT64_C(0x100000000) / crc64_test_size);
passes = passes >= 2 ? passes : 2;
passes = passes <= 1000 ? passes : 1000;
}
crc64_init();
/* warm up the cache */
set_crc64_cutoffs(crc64_test_size+1, crc64_test_size+1);
uint64_t expect = crc64(0, data, crc64_test_size);
if ((!combine || testAll) && crc64_test_size) {
if (csv && init_this_loop) printf("algorithm,buffer,performance,crc64_matches\n");
/* get the single-character version for single-byte Redis behavior */
set_crc64_cutoffs(0, crc64_test_size+1);
assert(!bench_crc64(data, crc64_test_size, passes, expect, "crc_1byte", csv));
set_crc64_cutoffs(crc64_test_size+1, crc64_test_size+1);
/* run with 8-byte "single" path, crcfaster */
assert(!(bench_crc64(data, crc64_test_size, passes, expect, "crcspeed", csv)));
/* run with dual 8-byte paths */
set_crc64_cutoffs(1, crc64_test_size+1);
assert(!(bench_crc64(data, crc64_test_size, passes, expect, "crcdual", csv)));
/* run with tri 8-byte paths */
set_crc64_cutoffs(1, 1);
assert(!(bench_crc64(data, crc64_test_size, passes, expect, "crctri", csv)));
/* Be free memory region, be free. */
zfree(data);
data = NULL;
}
uint64_t INIT_SIZE = UINT64_C(0xffffffffffffffff);
if (combine || testAll) {
if (init_this_loop) {
init_start = _ustime();
crc64_combine(
UINT64_C(0xdeadbeefdeadbeef),
UINT64_C(0xfeebdaedfeebdaed),
INIT_SIZE,
BENCH_RPOLY, 64);
init_end = _ustime();
init_end -= init_start;
init_end *= 1000;
if (csv) {
printf("operation,size,nanoseconds\n");
printf("init_64,%" PRIu64 ",%" PRIu64 "\n", INIT_SIZE, (uint64_t)init_end);
} else {
printf("init_64 size=%" PRIu64 " in %" PRIu64 " nsec\n", INIT_SIZE, (uint64_t)init_end);
}
/* use the hash itself as the size (unpredictable) */
bench_combine("hash_as_size_combine", crc64_test_size, expect, csv);
/* let's do something big (predictable, so fast) */
bench_combine("largest_combine", INIT_SIZE, expect, csv);
}
bench_combine("combine", crc64_test_size, expect, csv);
}
init_this_loop = 0;
/* step down by ~1.641 for a range of test sizes */
crc64_test_size -= (crc64_test_size >> 2) + (crc64_test_size >> 3) + (crc64_test_size >> 6);
} while (crc64_test_size > 3);
if (loop) goto again;
return 0; return 0;
} }
#endif #endif
#ifdef REDIS_TEST_MAIN
int main(int argc, char *argv[]) {
return crc64Test(argc, argv);
}
#endif

252
src/crccombine.c Normal file
View File

@ -0,0 +1,252 @@
#include <stdint.h>
#include <stdio.h>
#include <strings.h>
#if defined(__i386__) || defined(__X86_64__)
#include <immintrin.h>
#endif
#include "crccombine.h"
/* Copyright (C) 2013 Mark Adler
* Copyright (C) 2019-2024 Josiah Carlson
* Portions originally from: crc64.c Version 1.4 16 Dec 2013 Mark Adler
* Modifications by Josiah Carlson <josiah.carlson@gmail.com>
* - Added implementation variations with sample timings for gf_matrix_times*()
* - Most folks would be best using gf2_matrix_times_vec or
* gf2_matrix_times_vec2, unless some processor does AVX2 fast.
* - This is the implementation of the MERGE_CRC macro defined in
* crcspeed.c (which calls crc_combine()), and is a specialization of the
* generic crc_combine() (and related from the 2013 edition of Mark Adler's
* crc64.c)) for the sake of clarity and performance.
This software is provided 'as-is', without any express or implied
warranty. In no event will the author be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Mark Adler
madler@alumni.caltech.edu
*/
#define STATIC_ASSERT(VVV) do {int test = 1 / (VVV);test++;} while (0)
#if !((defined(__i386__) || defined(__X86_64__)))
/* This cuts 40% of the time vs bit-by-bit. */
uint64_t gf2_matrix_times_switch(uint64_t *mat, uint64_t vec) {
/*
* Without using any vector math, this handles 4 bits at a time,
* and saves 40+% of the time compared to the bit-by-bit version. Use if you
* have no vector compile option available to you. With cache, we see:
* E5-2670 ~1-2us to extend ~1 meg 64 bit hash
*/
uint64_t sum;
sum = 0;
while (vec) {
/* reversing the case order is ~10% slower on Xeon E5-2670 */
switch (vec & 15) {
case 15:
sum ^= *mat ^ *(mat+1) ^ *(mat+2) ^ *(mat+3);
break;
case 14:
sum ^= *(mat+1) ^ *(mat+2) ^ *(mat+3);
break;
case 13:
sum ^= *mat ^ *(mat+2) ^ *(mat+3);
break;
case 12:
sum ^= *(mat+2) ^ *(mat+3);
break;
case 11:
sum ^= *mat ^ *(mat+1) ^ *(mat+3);
break;
case 10:
sum ^= *(mat+1) ^ *(mat+3);
break;
case 9:
sum ^= *mat ^ *(mat+3);
break;
case 8:
sum ^= *(mat+3);
break;
case 7:
sum ^= *mat ^ *(mat+1) ^ *(mat+2);
break;
case 6:
sum ^= *(mat+1) ^ *(mat+2);
break;
case 5:
sum ^= *mat ^ *(mat+2);
break;
case 4:
sum ^= *(mat+2);
break;
case 3:
sum ^= *mat ^ *(mat+1);
break;
case 2:
sum ^= *(mat+1);
break;
case 1:
sum ^= *mat;
break;
default:
break;
}
vec >>= 4;
mat += 4;
}
return sum;
}
#define CRC_MULTIPLY gf2_matrix_times_switch
#else
/*
Warning: here there be dragons involving vector math, and macros to save us
from repeating the same information over and over.
*/
uint64_t gf2_matrix_times_vec2(uint64_t *mat, uint64_t vec) {
/*
* Uses xmm registers on x86, works basically everywhere fast, doing
* cycles of movqda, mov, shr, pand, and, pxor, at least on gcc 8.
* Is 9-11x faster than original.
* E5-2670 ~29us to extend ~1 meg 64 bit hash
* i3-8130U ~22us to extend ~1 meg 64 bit hash
*/
v2uq sum = {0, 0},
*mv2 = (v2uq*)mat;
/* this table allows us to eliminate conditions during gf2_matrix_times_vec2() */
static v2uq masks2[4] = {
{0,0},
{-1,0},
{0,-1},
{-1,-1},
};
/* Almost as beautiful as gf2_matrix_times_vec, but only half as many
* bits per step, so we need 2 per chunk4 operation. Faster in my tests. */
#define DO_CHUNK4() \
sum ^= (*mv2++) & masks2[vec & 3]; \
vec >>= 2; \
sum ^= (*mv2++) & masks2[vec & 3]; \
vec >>= 2
#define DO_CHUNK16() \
DO_CHUNK4(); \
DO_CHUNK4(); \
DO_CHUNK4(); \
DO_CHUNK4()
DO_CHUNK16();
DO_CHUNK16();
DO_CHUNK16();
DO_CHUNK16();
STATIC_ASSERT(sizeof(uint64_t) == 8);
STATIC_ASSERT(sizeof(long long unsigned int) == 8);
return sum[0] ^ sum[1];
}
#undef DO_CHUNK16
#undef DO_CHUNK4
#define CRC_MULTIPLY gf2_matrix_times_vec2
#endif
static void gf2_matrix_square(uint64_t *square, uint64_t *mat, uint8_t dim) {
unsigned n;
for (n = 0; n < dim; n++)
square[n] = CRC_MULTIPLY(mat, mat[n]);
}
/* Turns out our Redis / Jones CRC cycles at this point, so we can support
* more than 64 bits of extension if we want. Trivially. */
static uint64_t combine_cache[64][64];
/* Mark Adler has some amazing updates to crc.c in his crcany repository. I
* like static caches, and not worrying about finding cycles generally. We are
* okay to spend the 32k of memory here, leaving the algorithm unchanged from
* as it was a decade ago, and be happy that it costs <200 microseconds to
* init, and that subsequent calls to the combine function take under 100
* nanoseconds. We also note that the crcany/crc.c code applies to any CRC, and
* we are currently targeting one: Jones CRC64.
*/
void init_combine_cache(uint64_t poly, uint8_t dim) {
unsigned n, cache_num = 0;
combine_cache[1][0] = poly;
int prev = 1;
uint64_t row = 1;
for (n = 1; n < dim; n++)
{
combine_cache[1][n] = row;
row <<= 1;
}
gf2_matrix_square(combine_cache[0], combine_cache[1], dim);
gf2_matrix_square(combine_cache[1], combine_cache[0], dim);
/* do/while to overwrite the first two layers, they are not used, but are
* re-generated in the last two layers for the Redis polynomial */
do {
gf2_matrix_square(combine_cache[cache_num], combine_cache[cache_num + prev], dim);
prev = -1;
} while (++cache_num < 64);
}
/* Return the CRC-64 of two sequential blocks, where crc1 is the CRC-64 of the
* first block, crc2 is the CRC-64 of the second block, and len2 is the length
* of the second block.
*
* If you want reflections on your CRCs; do them outside before / after.
* WARNING: if you enable USE_STATIC_COMBINE_CACHE to make this fast, you MUST
* ALWAYS USE THE SAME POLYNOMIAL, otherwise you will get the wrong results.
* You MAY bzero() the even/odd static arrays, which will induce a re-cache on
* next call as a work-around, but ... maybe just parameterize the cached
* models at that point like Mark Adler does in modern crcany/crc.c .
*/
uint64_t crc64_combine(uint64_t crc1, uint64_t crc2, uintmax_t len2, uint64_t poly, uint8_t dim) {
/* degenerate case */
if (len2 == 0)
return crc1;
unsigned cache_num = 0;
if (combine_cache[0][0] == 0) {
init_combine_cache(poly, dim);
}
/* apply len2 zeros to crc1 (first square will put the operator for one
zero byte, eight zero bits, in even) */
do
{
/* apply zeros operator for this bit of len2 */
if (len2 & 1)
crc1 = CRC_MULTIPLY(combine_cache[cache_num], crc1);
len2 >>= 1;
cache_num = (cache_num + 1) & 63;
/* if no more bits set, then done */
} while (len2 != 0);
/* return combined crc */
crc1 ^= crc2;
return crc1;
}
#undef CRC_MULTIPLY

10
src/crccombine.h Normal file
View File

@ -0,0 +1,10 @@
#include <stdint.h>
/* mask types */
typedef unsigned long long v2uq __attribute__ ((vector_size (16)));
uint64_t gf2_matrix_times_vec2(uint64_t *mat, uint64_t vec);
void init_combine_cache(uint64_t poly, uint8_t dim);
uint64_t crc64_combine(uint64_t crc1, uint64_t crc2, uintmax_t len2, uint64_t poly, uint8_t dim);

View File

@ -1,11 +1,21 @@
/* /*
* Copyright (C) 2013 Mark Adler * Copyright (C) 2013 Mark Adler
* Copyright (C) 2019-2024 Josiah Carlson
* Originally by: crc64.c Version 1.4 16 Dec 2013 Mark Adler * Originally by: crc64.c Version 1.4 16 Dec 2013 Mark Adler
* Modifications by Matt Stancliff <matt@genges.com>: * Modifications by Matt Stancliff <matt@genges.com>:
* - removed CRC64-specific behavior * - removed CRC64-specific behavior
* - added generation of lookup tables by parameters * - added generation of lookup tables by parameters
* - removed inversion of CRC input/result * - removed inversion of CRC input/result
* - removed automatic initialization in favor of explicit initialization * - removed automatic initialization in favor of explicit initialization
* Modifications by Josiah Carlson <josiah.carlson@gmail.com>
* - Added case/vector/AVX/+ versions of crc combine function; see crccombine.c
* - added optional static cache
* - Modified to use 1 thread to:
* - Partition large crc blobs into 2-3 segments
* - Process the 2-3 segments in parallel
* - Merge the resulting crcs
* -> Resulting in 10-90% performance boost for data > 1 meg
* - macro-ized to reduce copy/pasta
This software is provided 'as-is', without any express or implied This software is provided 'as-is', without any express or implied
warranty. In no event will the author be held liable for any damages warranty. In no event will the author be held liable for any damages
@ -28,6 +38,10 @@
*/ */
#include "crcspeed.h" #include "crcspeed.h"
#include "crccombine.h"
#define CRC64_LEN_MASK UINT64_C(0x7ffffffffffffff8)
#define CRC64_REVERSED_POLY UINT64_C(0x95ac9329ac4bc9b5)
/* Fill in a CRC constants table. */ /* Fill in a CRC constants table. */
void crcspeed64little_init(crcfn64 crcfn, uint64_t table[8][256]) { void crcspeed64little_init(crcfn64 crcfn, uint64_t table[8][256]) {
@ -39,7 +53,7 @@ void crcspeed64little_init(crcfn64 crcfn, uint64_t table[8][256]) {
table[0][n] = crcfn(0, &v, 1); table[0][n] = crcfn(0, &v, 1);
} }
/* generate nested CRC table for future slice-by-8 lookup */ /* generate nested CRC table for future slice-by-8/16/24+ lookup */
for (int n = 0; n < 256; n++) { for (int n = 0; n < 256; n++) {
crc = table[0][n]; crc = table[0][n];
for (int k = 1; k < 8; k++) { for (int k = 1; k < 8; k++) {
@ -47,6 +61,10 @@ void crcspeed64little_init(crcfn64 crcfn, uint64_t table[8][256]) {
table[k][n] = crc; table[k][n] = crc;
} }
} }
#if USE_STATIC_COMBINE_CACHE
/* initialize combine cache for CRC stapling for slice-by 16/24+ */
init_combine_cache(CRC64_REVERSED_POLY, 64);
#endif
} }
void crcspeed16little_init(crcfn16 crcfn, uint16_t table[8][256]) { void crcspeed16little_init(crcfn16 crcfn, uint16_t table[8][256]) {
@ -104,45 +122,151 @@ void crcspeed16big_init(crcfn16 fn, uint16_t big_table[8][256]) {
} }
} }
/* Note: doing all of our crc/next modifications *before* the crc table
* references is an absolute speedup on all CPUs tested. So... keep these
* macros separate.
*/
#define DO_8_1(crc, next) \
crc ^= *(uint64_t *)next; \
next += 8
#define DO_8_2(crc) \
crc = little_table[7][(uint8_t)crc] ^ \
little_table[6][(uint8_t)(crc >> 8)] ^ \
little_table[5][(uint8_t)(crc >> 16)] ^ \
little_table[4][(uint8_t)(crc >> 24)] ^ \
little_table[3][(uint8_t)(crc >> 32)] ^ \
little_table[2][(uint8_t)(crc >> 40)] ^ \
little_table[1][(uint8_t)(crc >> 48)] ^ \
little_table[0][crc >> 56]
#define CRC64_SPLIT(div) \
olen = len; \
next2 = next1 + ((len / div) & CRC64_LEN_MASK); \
len = (next2 - next1)
#define MERGE_CRC(crcn) \
crc1 = crc64_combine(crc1, crcn, next2 - next1, CRC64_REVERSED_POLY, 64)
#define MERGE_END(last, DIV) \
len = olen - ((next2 - next1) * DIV); \
next1 = last
/* Variables so we can change for benchmarking; these seem to be fairly
* reasonable for Intel CPUs made since 2010. Please adjust as necessary if
* or when your CPU has more load / execute units. We've written benchmark code
* to help you tune your platform, see crc64Test. */
#if defined(__i386__) || defined(__X86_64__)
static size_t CRC64_TRI_CUTOFF = (2*1024);
static size_t CRC64_DUAL_CUTOFF = (128);
#else
static size_t CRC64_TRI_CUTOFF = (16*1024);
static size_t CRC64_DUAL_CUTOFF = (1024);
#endif
void set_crc64_cutoffs(size_t dual_cutoff, size_t tri_cutoff) {
CRC64_DUAL_CUTOFF = dual_cutoff;
CRC64_TRI_CUTOFF = tri_cutoff;
}
/* Calculate a non-inverted CRC multiple bytes at a time on a little-endian /* Calculate a non-inverted CRC multiple bytes at a time on a little-endian
* architecture. If you need inverted CRC, invert *before* calling and invert * architecture. If you need inverted CRC, invert *before* calling and invert
* *after* calling. * *after* calling.
* 64 bit crc = process 8 bytes at once; * 64 bit crc = process 8/16/24 bytes at once;
*/ */
uint64_t crcspeed64little(uint64_t little_table[8][256], uint64_t crc, uint64_t crcspeed64little(uint64_t little_table[8][256], uint64_t crc1,
void *buf, size_t len) { void *buf, size_t len) {
unsigned char *next = buf; unsigned char *next1 = buf;
if (CRC64_DUAL_CUTOFF < 1) {
goto final;
}
/* process individual bytes until we reach an 8-byte aligned pointer */ /* process individual bytes until we reach an 8-byte aligned pointer */
while (len && ((uintptr_t)next & 7) != 0) { while (len && ((uintptr_t)next1 & 7) != 0) {
crc = little_table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8); crc1 = little_table[0][(crc1 ^ *next1++) & 0xff] ^ (crc1 >> 8);
len--; len--;
} }
/* fast middle processing, 8 bytes (aligned!) per loop */ if (len > CRC64_TRI_CUTOFF) {
while (len >= 8) { /* 24 bytes per loop, doing 3 parallel 8 byte chunks at a time */
crc ^= *(uint64_t *)next; unsigned char *next2, *next3;
crc = little_table[7][crc & 0xff] ^ uint64_t olen, crc2=0, crc3=0;
little_table[6][(crc >> 8) & 0xff] ^ CRC64_SPLIT(3);
little_table[5][(crc >> 16) & 0xff] ^ /* len is now the length of the first segment, the 3rd segment possibly
little_table[4][(crc >> 24) & 0xff] ^ * having extra bytes to clean up at the end
little_table[3][(crc >> 32) & 0xff] ^ */
little_table[2][(crc >> 40) & 0xff] ^ next3 = next2 + len;
little_table[1][(crc >> 48) & 0xff] ^ while (len >= 8) {
little_table[0][crc >> 56]; len -= 8;
next += 8; DO_8_1(crc1, next1);
len -= 8; DO_8_1(crc2, next2);
} DO_8_1(crc3, next3);
DO_8_2(crc1);
DO_8_2(crc2);
DO_8_2(crc3);
}
/* merge the 3 crcs */
MERGE_CRC(crc2);
MERGE_CRC(crc3);
MERGE_END(next3, 3);
} else if (len > CRC64_DUAL_CUTOFF) {
/* 16 bytes per loop, doing 2 parallel 8 byte chunks at a time */
unsigned char *next2;
uint64_t olen, crc2=0;
CRC64_SPLIT(2);
/* len is now the length of the first segment, the 2nd segment possibly
* having extra bytes to clean up at the end
*/
while (len >= 8) {
len -= 8;
DO_8_1(crc1, next1);
DO_8_1(crc2, next2);
DO_8_2(crc1);
DO_8_2(crc2);
}
/* merge the 2 crcs */
MERGE_CRC(crc2);
MERGE_END(next2, 2);
}
/* We fall through here to handle our <CRC64_DUAL_CUTOFF inputs, and for any trailing
* bytes that wasn't evenly divisble by 16 or 24 above. */
/* fast processing, 8 bytes (aligned!) per loop */
while (len >= 8) {
len -= 8;
DO_8_1(crc1, next1);
DO_8_2(crc1);
}
final:
/* process remaining bytes (can't be larger than 8) */ /* process remaining bytes (can't be larger than 8) */
while (len) { while (len) {
crc = little_table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8); crc1 = little_table[0][(crc1 ^ *next1++) & 0xff] ^ (crc1 >> 8);
len--; len--;
} }
return crc; return crc1;
} }
/* clean up our namespace */
#undef DO_8_1
#undef DO_8_2
#undef CRC64_SPLIT
#undef MERGE_CRC
#undef MERGE_END
#undef CRC64_REVERSED_POLY
#undef CRC64_LEN_MASK
/* note: similar perf advantages can be had for long strings in crc16 using all
* of the same optimizations as above; though this is unnecessary. crc16 is
* normally used to shard keys; not hash / verify data, so is used on shorter
* data that doesn't warrant such changes. */
uint16_t crcspeed16little(uint16_t little_table[8][256], uint16_t crc, uint16_t crcspeed16little(uint16_t little_table[8][256], uint16_t crc,
void *buf, size_t len) { void *buf, size_t len) {
unsigned char *next = buf; unsigned char *next = buf;
@ -190,6 +314,10 @@ uint64_t crcspeed64big(uint64_t big_table[8][256], uint64_t crc, void *buf,
len--; len--;
} }
/* note: alignment + 2/3-way processing can probably be handled here nearly
the same as above, using our updated DO_8_2 macro. Not included in these
changes, as other authors, I don't have big-endian to test with. */
while (len >= 8) { while (len >= 8) {
crc ^= *(uint64_t *)next; crc ^= *(uint64_t *)next;
crc = big_table[0][crc & 0xff] ^ crc = big_table[0][crc & 0xff] ^

View File

@ -34,6 +34,8 @@
typedef uint64_t (*crcfn64)(uint64_t, const void *, const uint64_t); typedef uint64_t (*crcfn64)(uint64_t, const void *, const uint64_t);
typedef uint16_t (*crcfn16)(uint16_t, const void *, const uint64_t); typedef uint16_t (*crcfn16)(uint16_t, const void *, const uint64_t);
void set_crc64_cutoffs(size_t dual_cutoff, size_t tri_cutoff);
/* CRC-64 */ /* CRC-64 */
void crcspeed64little_init(crcfn64 fn, uint64_t table[8][256]); void crcspeed64little_init(crcfn64 fn, uint64_t table[8][256]);
void crcspeed64big_init(crcfn64 fn, uint64_t table[8][256]); void crcspeed64big_init(crcfn64 fn, uint64_t table[8][256]);

View File

@ -21,6 +21,8 @@
* C-level DB API * C-level DB API
*----------------------------------------------------------------------------*/ *----------------------------------------------------------------------------*/
static_assert(MAX_KEYSIZES_TYPES == OBJ_TYPE_BASIC_MAX, "Must be equal");
/* Flags for expireIfNeeded */ /* Flags for expireIfNeeded */
#define EXPIRE_FORCE_DELETE_EXPIRED 1 #define EXPIRE_FORCE_DELETE_EXPIRED 1
#define EXPIRE_AVOID_DELETE_EXPIRED 2 #define EXPIRE_AVOID_DELETE_EXPIRED 2
@ -46,6 +48,48 @@ void updateLFU(robj *val) {
val->lru = (LFUGetTimeInMinutes()<<8) | counter; val->lru = (LFUGetTimeInMinutes()<<8) | counter;
} }
/*
* Update histogram of keys-sizes
*
* It is used to track the distribution of key sizes in the dataset. It is updated
* every time key's length is modified. Available to user via INFO command.
*
* The histogram is a base-2 logarithmic histogram, with 64 bins. The i'th bin
* represents the number of keys with a size in the range 2^i and 2^(i+1)
* exclusive. oldLen/newLen must be smaller than 2^48, and if their value
* equals 0, it means that the key is being created/deleted, respectively. Each
* data type has its own histogram and it is per database (In addition, there is
* histogram per slot for future cluster use).
*
* Examples to LEN values and corresponding bins in histogram:
* [1,2)->0 [2,4)->1 [4,8)->2 [8,16)->3
*/
void updateKeysizesHist(redisDb *db, int didx, uint32_t type, uint64_t oldLen, uint64_t newLen) {
if(unlikely(type >= OBJ_TYPE_BASIC_MAX))
return;
kvstoreDictMetadata *dictMeta = kvstoreGetDictMetadata(db->keys, didx);
kvstoreMetadata *kvstoreMeta = kvstoreGetMetadata(db->keys);
if (oldLen != 0) {
int old_bin = log2ceil(oldLen);
debugServerAssertWithInfo(server.current_client, NULL, old_bin < MAX_KEYSIZES_BINS);
/* If following a key deletion it is last one in slot's dict, then
* slot's dict might get released as well. Verify if metadata is not NULL. */
if(dictMeta) dictMeta->keysizes_hist[type][old_bin]--;
kvstoreMeta->keysizes_hist[type][old_bin]--;
}
if (newLen != 0) {
int new_bin = log2ceil(newLen);
debugServerAssertWithInfo(server.current_client, NULL, new_bin < MAX_KEYSIZES_BINS);
/* If following a key deletion it is last one in slot's dict, then
* slot's dict might get released as well. Verify if metadata is not NULL. */
if(dictMeta) dictMeta->keysizes_hist[type][new_bin]++;
kvstoreMeta->keysizes_hist[type][new_bin]++;
}
}
/* Lookup a key for read or write operations, or return NULL if the key is not /* Lookup a key for read or write operations, or return NULL if the key is not
* found in the specified DB. This function implements the functionality of * found in the specified DB. This function implements the functionality of
* lookupKeyRead(), lookupKeyWrite() and their ...WithFlags() variants. * lookupKeyRead(), lookupKeyWrite() and their ...WithFlags() variants.
@ -205,6 +249,7 @@ static dictEntry *dbAddInternal(redisDb *db, robj *key, robj *val, int update_if
kvstoreDictSetVal(db->keys, slot, de, val); kvstoreDictSetVal(db->keys, slot, de, val);
signalKeyAsReady(db, key, val->type); signalKeyAsReady(db, key, val->type);
notifyKeyspaceEvent(NOTIFY_NEW,"new",key,db->id); notifyKeyspaceEvent(NOTIFY_NEW,"new",key,db->id);
updateKeysizesHist(db, slot, val->type, 0, getObjectLength(val)); /* add hist */
return de; return de;
} }
@ -250,6 +295,7 @@ int dbAddRDBLoad(redisDb *db, sds key, robj *val) {
int slot = getKeySlot(key); int slot = getKeySlot(key);
dictEntry *de = kvstoreDictAddRaw(db->keys, slot, key, NULL); dictEntry *de = kvstoreDictAddRaw(db->keys, slot, key, NULL);
if (de == NULL) return 0; if (de == NULL) return 0;
updateKeysizesHist(db, slot, val->type, 0, getObjectLength(val)); /* add hist */
initObjectLRUOrLFU(val); initObjectLRUOrLFU(val);
kvstoreDictSetVal(db->keys, slot, de, val); kvstoreDictSetVal(db->keys, slot, de, val);
return 1; return 1;
@ -273,6 +319,9 @@ static void dbSetValue(redisDb *db, robj *key, robj *val, int overwrite, dictEnt
serverAssertWithInfo(NULL,key,de != NULL); serverAssertWithInfo(NULL,key,de != NULL);
robj *old = dictGetVal(de); robj *old = dictGetVal(de);
/* Remove old key from keysizes histogram */
updateKeysizesHist(db, slot, old->type, getObjectLength(old), 0); /* remove hist */
val->lru = old->lru; val->lru = old->lru;
if (overwrite) { if (overwrite) {
@ -291,6 +340,9 @@ static void dbSetValue(redisDb *db, robj *key, robj *val, int overwrite, dictEnt
} }
kvstoreDictSetVal(db->keys, slot, de, val); kvstoreDictSetVal(db->keys, slot, de, val);
/* Add new key to keysizes histogram */
updateKeysizesHist(db, slot, val->type, 0, getObjectLength(val));
/* if hash with HFEs, take care to remove from global HFE DS */ /* if hash with HFEs, take care to remove from global HFE DS */
if (old->type == OBJ_HASH) if (old->type == OBJ_HASH)
hashTypeRemoveFromExpires(&db->hexpires, old); hashTypeRemoveFromExpires(&db->hexpires, old);
@ -404,6 +456,9 @@ int dbGenericDelete(redisDb *db, robj *key, int async, int flags) {
if (de) { if (de) {
robj *val = dictGetVal(de); robj *val = dictGetVal(de);
/* remove key from histogram */
updateKeysizesHist(db, slot, val->type, getObjectLength(val), 0);
/* If hash object with expiry on fields, remove it from HFE DS of DB */ /* If hash object with expiry on fields, remove it from HFE DS of DB */
if (val->type == OBJ_HASH) if (val->type == OBJ_HASH)
hashTypeRemoveFromExpires(&db->hexpires, val); hashTypeRemoveFromExpires(&db->hexpires, val);
@ -599,7 +654,8 @@ redisDb *initTempDb(void) {
redisDb *tempDb = zcalloc(sizeof(redisDb)*server.dbnum); redisDb *tempDb = zcalloc(sizeof(redisDb)*server.dbnum);
for (int i=0; i<server.dbnum; i++) { for (int i=0; i<server.dbnum; i++) {
tempDb[i].id = i; tempDb[i].id = i;
tempDb[i].keys = kvstoreCreate(&dbDictType, slot_count_bits, flags); tempDb[i].keys = kvstoreCreate(&dbDictType, slot_count_bits,
flags | KVSTORE_ALLOC_META_KEYS_HIST);
tempDb[i].expires = kvstoreCreate(&dbExpiresDictType, slot_count_bits, flags); tempDb[i].expires = kvstoreCreate(&dbExpiresDictType, slot_count_bits, flags);
tempDb[i].hexpires = ebCreate(); tempDb[i].hexpires = ebCreate();
} }
@ -1200,7 +1256,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) {
* The exception to the above is ZSET, where we do allocate temporary * The exception to the above is ZSET, where we do allocate temporary
* strings even when scanning a dict. */ * strings even when scanning a dict. */
if (o && (!ht || o->type == OBJ_ZSET)) { if (o && (!ht || o->type == OBJ_ZSET)) {
listSetFreeMethod(keys, (void (*)(void*))sdsfree); listSetFreeMethod(keys, sdsfreegeneric);
} }
/* For main dictionary scan or data structure using hashtable. */ /* For main dictionary scan or data structure using hashtable. */

View File

@ -2,6 +2,9 @@
* Copyright (c) 2009-Present, Redis Ltd. * Copyright (c) 2009-Present, Redis Ltd.
* All rights reserved. * All rights reserved.
* *
* Copyright (c) 2024-present, Valkey contributors.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0 * Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2) or the Server Side Public License v1 (SSPLv1). * (RSALv2) or the Server Side Public License v1 (SSPLv1).
* *
@ -483,6 +486,8 @@ void debugCommand(client *c) {
" In case RESET is provided the peak reset time will be restored to the default value", " In case RESET is provided the peak reset time will be restored to the default value",
"REPLYBUFFER RESIZING <0|1>", "REPLYBUFFER RESIZING <0|1>",
" Enable or disable the reply buffer resize cron job", " Enable or disable the reply buffer resize cron job",
"REPL-PAUSE <clear|after-fork|before-rdb-channel|on-streaming-repl-buf>",
" Pause the server's main process during various replication steps.",
"DICT-RESIZING <0|1>", "DICT-RESIZING <0|1>",
" Enable or disable the main dict and expire dict resizing.", " Enable or disable the main dict and expire dict resizing.",
"SCRIPT <LIST|<sha>>", "SCRIPT <LIST|<sha>>",
@ -1018,6 +1023,20 @@ NULL
return; return;
} }
addReply(c, shared.ok); addReply(c, shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr, "repl-pause") && c->argc == 3) {
if (!strcasecmp(c->argv[2]->ptr, "clear")) {
server.repl_debug_pause = REPL_DEBUG_PAUSE_NONE;
} else if (!strcasecmp(c->argv[2]->ptr,"after-fork")) {
server.repl_debug_pause |= REPL_DEBUG_AFTER_FORK;
} else if (!strcasecmp(c->argv[2]->ptr,"before-rdb-channel")) {
server.repl_debug_pause |= REPL_DEBUG_BEFORE_RDB_CHANNEL;
} else if (!strcasecmp(c->argv[2]->ptr, "on-streaming-repl-buf")) {
server.repl_debug_pause |= REPL_DEBUG_ON_STREAMING_REPL_BUF;
} else {
addReplySubcommandSyntaxError(c);
return;
}
addReply(c, shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr, "dict-resizing") && c->argc == 3) { } else if (!strcasecmp(c->argv[1]->ptr, "dict-resizing") && c->argc == 3) {
server.dict_resizing = atoi(c->argv[2]->ptr); server.dict_resizing = atoi(c->argv[2]->ptr);
addReply(c, shared.ok); addReply(c, shared.ok);
@ -1052,6 +1071,46 @@ NULL
/* =========================== Crash handling ============================== */ /* =========================== Crash handling ============================== */
/* When hide-user-data-from-log is enabled, to avoid leaking user info, we only
* print tokens of the current command into the log. First, we collect command
* tokens into this struct (Commands tokens are defined in json schema). Later,
* checking each argument against the token list. */
#define CMD_TOKEN_MAX_COUNT 128 /* Max token count in a command's json schema */
struct cmdToken {
const char *tokens[CMD_TOKEN_MAX_COUNT];
int n_token;
};
/* Collect tokens from command arguments recursively. */
static void cmdTokenCollect(struct cmdToken *tk, redisCommandArg *args, int argc) {
if (args == NULL)
return;
for (int i = 0; i < argc && tk->n_token < CMD_TOKEN_MAX_COUNT; i++) {
if (args[i].token)
tk->tokens[tk->n_token++] = args[i].token;
cmdTokenCollect(tk, args[i].subargs, args[i].num_args);
}
}
/* Get tokens of the command. */
static void cmdTokenGetFromCommand(struct cmdToken *tk, struct redisCommand *cmd) {
tk->n_token = 0;
cmdTokenCollect(tk, cmd->args, cmd->num_args);
}
/* Check if object is one of command's tokens. */
static int cmdTokenCheck(struct cmdToken *tk, robj *o) {
if (o->type != OBJ_STRING || !sdsEncodedObject(o))
return 0;
for (int i = 0; i < tk->n_token; i++) {
if (strcasecmp(tk->tokens[i], o->ptr) == 0)
return 1;
}
return 0;
}
__attribute__ ((noinline)) __attribute__ ((noinline))
void _serverAssert(const char *estr, const char *file, int line) { void _serverAssert(const char *estr, const char *file, int line) {
int new_report = bugReportStart(); int new_report = bugReportStart();
@ -1072,28 +1131,35 @@ void _serverAssert(const char *estr, const char *file, int line) {
bugReportEnd(0, 0); bugReportEnd(0, 0);
} }
/* Returns the amount of client's command arguments we allow logging */
int clientArgsToLog(const client *c) {
return server.hide_user_data_from_log ? 1 : c->argc;
}
void _serverAssertPrintClientInfo(const client *c) { void _serverAssertPrintClientInfo(const client *c) {
int j; int j;
char conninfo[CONN_INFO_LEN]; char conninfo[CONN_INFO_LEN];
struct redisCommand *cmd = NULL;
struct cmdToken tokens = {{0}};
bugReportStart(); bugReportStart();
serverLog(LL_WARNING,"=== ASSERTION FAILED CLIENT CONTEXT ==="); serverLog(LL_WARNING,"=== ASSERTION FAILED CLIENT CONTEXT ===");
serverLog(LL_WARNING,"client->flags = %llu", (unsigned long long) c->flags); serverLog(LL_WARNING,"client->flags = %llu", (unsigned long long) c->flags);
serverLog(LL_WARNING,"client->conn = %s", connGetInfo(c->conn, conninfo, sizeof(conninfo))); serverLog(LL_WARNING,"client->conn = %s", connGetInfo(c->conn, conninfo, sizeof(conninfo)));
serverLog(LL_WARNING,"client->argc = %d", c->argc); serverLog(LL_WARNING,"client->argc = %d", c->argc);
if (server.hide_user_data_from_log) {
cmd = lookupCommand(c->argv, c->argc);
if (cmd)
cmdTokenGetFromCommand(&tokens, cmd);
}
for (j=0; j < c->argc; j++) { for (j=0; j < c->argc; j++) {
if (j >= clientArgsToLog(c)) {
serverLog(LL_WARNING,"client->argv[%d] = *redacted*",j);
continue;
}
char buf[128]; char buf[128];
char *arg; char *arg;
/* Allow command name, subcommand name and command tokens in the log. */
if (server.hide_user_data_from_log && (j != 0 && !(j == 1 && cmd && cmd->parent))) {
if (!cmdTokenCheck(&tokens, c->argv[j])) {
serverLog(LL_WARNING, "client->argv[%d] = *redacted*", j);
continue;
}
}
if (c->argv[j]->type == OBJ_STRING && sdsEncodedObject(c->argv[j])) { if (c->argv[j]->type == OBJ_STRING && sdsEncodedObject(c->argv[j])) {
arg = (char*) c->argv[j]->ptr; arg = (char*) c->argv[j]->ptr;
} else { } else {
@ -2061,16 +2127,27 @@ void logCurrentClient(client *cc, const char *title) {
sds client; sds client;
int j; int j;
struct redisCommand *cmd = NULL;
struct cmdToken tokens = {{0}};
serverLog(LL_WARNING|LL_RAW, "\n------ %s CLIENT INFO ------\n", title); serverLog(LL_WARNING|LL_RAW, "\n------ %s CLIENT INFO ------\n", title);
client = catClientInfoString(sdsempty(),cc); client = catClientInfoString(sdsempty(),cc);
serverLog(LL_WARNING|LL_RAW,"%s\n", client); serverLog(LL_WARNING|LL_RAW,"%s\n", client);
sdsfree(client); sdsfree(client);
serverLog(LL_WARNING|LL_RAW,"argc: '%d'\n", cc->argc); serverLog(LL_WARNING|LL_RAW,"argc: '%d'\n", cc->argc);
if (server.hide_user_data_from_log) {
cmd = lookupCommand(cc->argv, cc->argc);
if (cmd)
cmdTokenGetFromCommand(&tokens, cmd);
}
for (j = 0; j < cc->argc; j++) { for (j = 0; j < cc->argc; j++) {
if (j >= clientArgsToLog(cc)) { /* Allow command name, subcommand name and command tokens in the log. */
serverLog(LL_WARNING|LL_RAW,"argv[%d]: *redacted*\n",j); if (server.hide_user_data_from_log && (j != 0 && !(j == 1 && cmd && cmd->parent))) {
continue; if (!cmdTokenCheck(&tokens, cc->argv[j])) {
serverLog(LL_WARNING|LL_RAW, "argv[%d]: '*redacted*'\n", j);
continue;
}
} }
robj *decoded; robj *decoded;
decoded = getDecodedObject(cc->argv[j]); decoded = getDecodedObject(cc->argv[j]);
@ -2393,6 +2470,8 @@ void removeSigSegvHandlers(void) {
} }
void printCrashReport(void) { void printCrashReport(void) {
server.crashing = 1;
/* Log INFO and CLIENT LIST */ /* Log INFO and CLIENT LIST */
logServerInfo(); logServerInfo();
@ -2523,6 +2602,12 @@ void applyWatchdogPeriod(void) {
} }
} }
void debugPauseProcess(void) {
serverLog(LL_NOTICE, "Process is about to stop.");
raise(SIGSTOP);
serverLog(LL_NOTICE, "Process has been continued.");
}
/* Positive input is sleep time in microseconds. Negative input is fractions /* Positive input is sleep time in microseconds. Negative input is fractions
* of microseconds, i.e. -10 means 100 nanoseconds. */ * of microseconds, i.e. -10 means 100 nanoseconds. */
void debugDelay(int usec) { void debugDelay(int usec) {

View File

@ -296,7 +296,7 @@ void activeDefragHfieldDictCallback(void *privdata, const dictEntry *de) {
dictUseStoredKeyApi(d, 1); dictUseStoredKeyApi(d, 1);
uint64_t hash = dictGetHash(d, newhf); uint64_t hash = dictGetHash(d, newhf);
dictUseStoredKeyApi(d, 0); dictUseStoredKeyApi(d, 0);
dictEntry *de = dictFindEntryByPtrAndHash(d, hf, hash); dictEntry *de = dictFindByHashAndPtr(d, hf, hash);
serverAssert(de); serverAssert(de);
dictSetKey(d, de, newhf); dictSetKey(d, de, newhf);
} }
@ -729,8 +729,9 @@ void defragStream(redisDb *db, dictEntry *kde) {
void defragModule(redisDb *db, dictEntry *kde) { void defragModule(redisDb *db, dictEntry *kde) {
robj *obj = dictGetVal(kde); robj *obj = dictGetVal(kde);
serverAssert(obj->type == OBJ_MODULE); serverAssert(obj->type == OBJ_MODULE);
robj keyobj;
if (!moduleDefragValue(dictGetKey(kde), obj, db->id)) initStaticStringObject(keyobj, dictGetKey(kde));
if (!moduleDefragValue(&keyobj, obj, db->id))
defragLater(db, kde); defragLater(db, kde);
} }
@ -752,7 +753,7 @@ void defragKey(defragCtx *ctx, dictEntry *de) {
* the pointer it holds, since it won't be able to do the string * the pointer it holds, since it won't be able to do the string
* compare, but we can find the entry using key hash and pointer. */ * compare, but we can find the entry using key hash and pointer. */
uint64_t hash = kvstoreGetHash(db->expires, newsds); uint64_t hash = kvstoreGetHash(db->expires, newsds);
dictEntry *expire_de = kvstoreDictFindEntryByPtrAndHash(db->expires, slot, keysds, hash); dictEntry *expire_de = kvstoreDictFindByHashAndPtr(db->expires, slot, keysds, hash);
if (expire_de) kvstoreDictSetKey(db->expires, slot, expire_de, newsds); if (expire_de) kvstoreDictSetKey(db->expires, slot, expire_de, newsds);
} }
@ -940,7 +941,9 @@ int defragLaterItem(dictEntry *de, unsigned long *cursor, long long endtime, int
} else if (ob->type == OBJ_STREAM) { } else if (ob->type == OBJ_STREAM) {
return scanLaterStreamListpacks(ob, cursor, endtime); return scanLaterStreamListpacks(ob, cursor, endtime);
} else if (ob->type == OBJ_MODULE) { } else if (ob->type == OBJ_MODULE) {
return moduleLateDefrag(dictGetKey(de), ob, cursor, endtime, dbid); robj keyobj;
initStaticStringObject(keyobj, dictGetKey(de));
return moduleLateDefrag(&keyobj, ob, cursor, endtime, dbid);
} else { } else {
*cursor = 0; /* object type may have changed since we schedule it for later */ *cursor = 0; /* object type may have changed since we schedule it for later */
} }

View File

@ -62,6 +62,7 @@ typedef struct {
static void _dictExpandIfNeeded(dict *d); static void _dictExpandIfNeeded(dict *d);
static void _dictShrinkIfNeeded(dict *d); static void _dictShrinkIfNeeded(dict *d);
static void _dictRehashStepIfNeeded(dict *d, uint64_t visitedIdx);
static signed char _dictNextExp(unsigned long size); static signed char _dictNextExp(unsigned long size);
static int _dictInit(dict *d, dictType *type); static int _dictInit(dict *d, dictType *type);
static dictEntry *dictGetNext(const dictEntry *de); static dictEntry *dictGetNext(const dictEntry *de);
@ -119,14 +120,16 @@ uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len) {
* pointer actually points to. If the least bit is set, it's a key. Otherwise, * pointer actually points to. If the least bit is set, it's a key. Otherwise,
* the bit pattern of the least 3 significant bits mark the kind of entry. */ * the bit pattern of the least 3 significant bits mark the kind of entry. */
#define ENTRY_PTR_MASK 7 /* 111 */ #define ENTRY_PTR_MASK 7 /* 111 */
#define ENTRY_PTR_NORMAL 0 /* 000 */ #define ENTRY_PTR_NORMAL 0 /* 000 : If a pointer to an entry with value. */
#define ENTRY_PTR_NO_VALUE 2 /* 010 */ #define ENTRY_PTR_IS_ODD_KEY 1 /* XX1 : If a pointer to odd key address (must be 1). */
#define ENTRY_PTR_IS_EVEN_KEY 2 /* 010 : If a pointer to even key address. (must be 2 or 4). */
#define ENTRY_PTR_NO_VALUE 4 /* 100 : If a pointer to an entry without value. */
/* Returns 1 if the entry pointer is a pointer to a key, rather than to an /* Returns 1 if the entry pointer is a pointer to a key, rather than to an
* allocated entry. Returns 0 otherwise. */ * allocated entry. Returns 0 otherwise. */
static inline int entryIsKey(const dictEntry *de) { static inline int entryIsKey(const dictEntry *de) {
return (uintptr_t)(void *)de & 1; return ((uintptr_t)de & (ENTRY_PTR_IS_ODD_KEY | ENTRY_PTR_IS_EVEN_KEY));
} }
/* Returns 1 if the pointer is actually a pointer to a dictEntry struct. Returns /* Returns 1 if the pointer is actually a pointer to a dictEntry struct. Returns
@ -155,7 +158,6 @@ static inline dictEntry *encodeMaskedPtr(const void *ptr, unsigned int bits) {
} }
static inline void *decodeMaskedPtr(const dictEntry *de) { static inline void *decodeMaskedPtr(const dictEntry *de) {
assert(!entryIsKey(de));
return (void *)((uintptr_t)(void *)de & ~ENTRY_PTR_MASK); return (void *)((uintptr_t)(void *)de & ~ENTRY_PTR_MASK);
} }
@ -275,6 +277,12 @@ int _dictResize(dict *d, unsigned long size, int* malloc_failed)
return DICT_OK; return DICT_OK;
} }
/* Force a full rehashing of the dictionary */
if (d->type->force_full_rehash) {
while (dictRehash(d, 1000)) {
/* Continue rehashing */
}
}
return DICT_OK; return DICT_OK;
} }
@ -326,18 +334,17 @@ static void rehashEntriesInBucketAtIndex(dict *d, uint64_t idx) {
h = idx & DICTHT_SIZE_MASK(d->ht_size_exp[1]); h = idx & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
} }
if (d->type->no_value) { if (d->type->no_value) {
if (d->type->keys_are_odd && !d->ht_table[1][h]) { if (!d->ht_table[1][h]) {
/* Destination bucket is empty and we can store the key /* The destination bucket is empty, allowing the key to be stored
* directly without an allocated entry. Free the old entry * directly without allocating a dictEntry. If an old entry was
* if it's an allocated entry. * previously allocated, free its memory. */
*
* TODO: Add a flag 'keys_are_even' and if set, we can use
* this optimization for these dicts too. We can set the LSB
* bit when stored as a dict entry and clear it again when
* we need the key back. */
assert(entryIsKey(key));
if (!entryIsKey(de)) zfree(decodeMaskedPtr(de)); if (!entryIsKey(de)) zfree(decodeMaskedPtr(de));
de = key;
if (d->type->keys_are_odd)
de = key; /* ENTRY_PTR_IS_ODD_KEY trivially set by the odd key. */
else
de = encodeMaskedPtr(key, ENTRY_PTR_IS_EVEN_KEY);
} else if (entryIsKey(de)) { } else if (entryIsKey(de)) {
/* We don't have an allocated entry but we need one. */ /* We don't have an allocated entry but we need one. */
de = createEntryNoValue(key, d->ht_table[1][h]); de = createEntryNoValue(key, d->ht_table[1][h]);
@ -509,6 +516,39 @@ dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
return dictInsertAtPosition(d, key, position); return dictInsertAtPosition(d, key, position);
} }
/* Low-level add function for non-existing keys:
* This function adds a new entry to the dictionary, assuming the key does not
* already exist.
* Parameters:
* - `dict *d`: Pointer to the dictionary structure.
* - `void *key`: Pointer to the key being added.
* - `const uint64_t hash`: hash of the key being added.
* Guarantees:
* - The key is assumed to be non-existing.
* Note:
* Ensure that the key's uniqueness is managed externally before calling this function. */
dictEntry *dictAddNonExistsByHash(dict *d, void *key, const uint64_t hash) {
/* Get the position for the new key, it should never be NULL. */
unsigned long idx, table;
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
/* Rehash the hash table if needed */
_dictRehashStepIfNeeded(d,idx);
/* Expand the hash table if needed */
_dictExpandIfNeeded(d);
table = dictIsRehashing(d) ? 1 : 0;
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
void *position = &d->ht_table[table][idx];
assert(position!=NULL);
/* Dup the key if necessary. */
if (d->type->keyDup) key = d->type->keyDup(d, key);
return dictInsertAtPosition(d, key, position);
}
/* Adds a key in the dict's hashtable at the position returned by a preceding /* Adds a key in the dict's hashtable at the position returned by a preceding
* call to dictFindPositionForInsert. This is a low level function which allows * call to dictFindPositionForInsert. This is a low level function which allows
* splitting dictAddRaw in two parts. Normally, dictAddRaw or dictAdd should be * splitting dictAddRaw in two parts. Normally, dictAddRaw or dictAdd should be
@ -522,16 +562,17 @@ dictEntry *dictInsertAtPosition(dict *d, void *key, void *position) {
assert(bucket >= &d->ht_table[htidx][0] && assert(bucket >= &d->ht_table[htidx][0] &&
bucket <= &d->ht_table[htidx][DICTHT_SIZE_MASK(d->ht_size_exp[htidx])]); bucket <= &d->ht_table[htidx][DICTHT_SIZE_MASK(d->ht_size_exp[htidx])]);
if (d->type->no_value) { if (d->type->no_value) {
if (d->type->keys_are_odd && !*bucket) { if (!*bucket) {
/* We can store the key directly in the destination bucket without the /* We can store the key directly in the destination bucket without
* allocated entry. * allocating dictEntry.
* */
* TODO: Add a flag 'keys_are_even' and if set, we can use this if (d->type->keys_are_odd) {
* optimization for these dicts too. We can set the LSB bit when entry = key;
* stored as a dict entry and clear it again when we need the key assert(entryIsKey(entry));
* back. */ /* The flag ENTRY_PTR_IS_ODD_KEY (=0x1) is already aligned with LSB bit */
entry = key; } else {
assert(entryIsKey(entry)); entry = encodeMaskedPtr(key, ENTRY_PTR_IS_EVEN_KEY);
}
} else { } else {
/* Allocate an entry without value. */ /* Allocate an entry without value. */
entry = createEntryNoValue(key, *bucket); entry = createEntryNoValue(key, *bucket);
@ -608,17 +649,8 @@ static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
h = dictHashKey(d, key, d->useStoredKeyApi); h = dictHashKey(d, key, d->useStoredKeyApi);
idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[0]); idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
if (dictIsRehashing(d)) { /* Rehash the hash table if needed */
if ((long)idx >= d->rehashidx && d->ht_table[0][idx]) { _dictRehashStepIfNeeded(d,idx);
/* If we have a valid hash entry at `idx` in ht0, we perform
* rehash on the bucket at `idx` (being more CPU cache friendly) */
_dictBucketRehash(d, idx);
} else {
/* If the hash entry is not in ht0, we rehash the buckets based
* on the rehashidx (not CPU cache friendly). */
_dictRehashStep(d);
}
}
keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d); keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d);
@ -697,8 +729,9 @@ int _dictClear(dict *d, int htidx, void(callback)(dict*)) {
/* Free all the elements */ /* Free all the elements */
for (i = 0; i < DICTHT_SIZE(d->ht_size_exp[htidx]) && d->ht_used[htidx] > 0; i++) { for (i = 0; i < DICTHT_SIZE(d->ht_size_exp[htidx]) && d->ht_used[htidx] > 0; i++) {
dictEntry *he, *nextHe; dictEntry *he, *nextHe;
/* Callback will be called once for every 65535 deletions. Beware,
if (callback && (i & 65535) == 0) callback(d); * if dict has less than 65535 items, it will not be called at all.*/
if (callback && i != 0 && (i & 65535) == 0) callback(d);
if ((he = d->ht_table[htidx][i]) == NULL) continue; if ((he = d->ht_table[htidx][i]) == NULL) continue;
while(he) { while(he) {
@ -733,44 +766,49 @@ void dictRelease(dict *d)
zfree(d); zfree(d);
} }
dictEntry *dictFind(dict *d, const void *key) dictEntry *dictFindByHash(dict *d, const void *key, const uint64_t hash) {
{
dictEntry *he; dictEntry *he;
uint64_t h, idx, table; uint64_t idx, table;
if (dictSize(d) == 0) return NULL; /* dict is empty */ if (dictSize(d) == 0) return NULL; /* dict is empty */
h = dictHashKey(d, key, d->useStoredKeyApi); idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d); keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d);
if (dictIsRehashing(d)) { /* Rehash the hash table if needed */
if ((long)idx >= d->rehashidx && d->ht_table[0][idx]) { _dictRehashStepIfNeeded(d,idx);
/* If we have a valid hash entry at `idx` in ht0, we perform
* rehash on the bucket at `idx` (being more CPU cache friendly) */
_dictBucketRehash(d, idx);
} else {
/* If the hash entry is not in ht0, we rehash the buckets based
* on the rehashidx (not CPU cache friendly). */
_dictRehashStep(d);
}
}
for (table = 0; table <= 1; table++) { for (table = 0; table <= 1; table++) {
if (table == 0 && (long)idx < d->rehashidx) continue; if (table == 0 && (long)idx < d->rehashidx) continue;
idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]); idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
/* Prefetch the bucket at the calculated index */
redis_prefetch_read(&d->ht_table[table][idx]);
he = d->ht_table[table][idx]; he = d->ht_table[table][idx];
while(he) { while(he) {
void *he_key = dictGetKey(he); void *he_key = dictGetKey(he);
/* Prefetch the next entry to improve cache efficiency */
redis_prefetch_read(dictGetNext(he));
if (key == he_key || cmpFunc(d, key, he_key)) if (key == he_key || cmpFunc(d, key, he_key))
return he; return he;
he = dictGetNext(he); he = dictGetNext(he);
} }
if (!dictIsRehashing(d)) return NULL; /* Use unlikely to optimize branch prediction for the common case */
if (unlikely(!dictIsRehashing(d))) return NULL;
} }
return NULL; return NULL;
} }
dictEntry *dictFind(dict *d, const void *key)
{
if (dictSize(d) == 0) return NULL; /* dict is empty */
const uint64_t hash = dictHashKey(d, key, d->useStoredKeyApi);
return dictFindByHash(d,key,hash);
}
void *dictFetchValue(dict *d, const void *key) { void *dictFetchValue(dict *d, const void *key) {
dictEntry *he; dictEntry *he;
@ -877,7 +915,10 @@ double dictIncrDoubleVal(dictEntry *de, double val) {
} }
void *dictGetKey(const dictEntry *de) { void *dictGetKey(const dictEntry *de) {
if (entryIsKey(de)) return (void*)de; /* if entryIsKey() */
if ((uintptr_t)de & ENTRY_PTR_IS_ODD_KEY) return (void *) de;
if ((uintptr_t)de & ENTRY_PTR_IS_EVEN_KEY) return decodeMaskedPtr(de);
/* Regular entry */
if (entryIsNoValue(de)) return decodeEntryNoValue(de)->key; if (entryIsNoValue(de)) return decodeEntryNoValue(de)->key;
return de->key; return de->key;
} }
@ -1556,6 +1597,21 @@ static void _dictShrinkIfNeeded(dict *d)
dictShrinkIfNeeded(d); dictShrinkIfNeeded(d);
} }
static void _dictRehashStepIfNeeded(dict *d, uint64_t visitedIdx) {
if ((!dictIsRehashing(d)) || (d->pauserehash != 0))
return;
/* rehashing not in progress if rehashidx == -1 */
if ((long)visitedIdx >= d->rehashidx && d->ht_table[0][visitedIdx]) {
/* If we have a valid hash entry at `idx` in ht0, we perform
* rehash on the bucket at `idx` (being more CPU cache friendly) */
_dictBucketRehash(d, visitedIdx);
} else {
/* If the hash entry is not in ht0, we rehash the buckets based
* on the rehashidx (not CPU cache friendly). */
dictRehash(d,1);
}
}
/* Our hash table capability is a power of two */ /* Our hash table capability is a power of two */
static signed char _dictNextExp(unsigned long size) static signed char _dictNextExp(unsigned long size)
{ {
@ -1576,17 +1632,8 @@ void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing)
if (existing) *existing = NULL; if (existing) *existing = NULL;
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[0]); idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
if (dictIsRehashing(d)) { /* Rehash the hash table if needed */
if ((long)idx >= d->rehashidx && d->ht_table[0][idx]) { _dictRehashStepIfNeeded(d,idx);
/* If we have a valid hash entry at `idx` in ht0, we perform
* rehash on the bucket at `idx` (being more CPU cache friendly) */
_dictBucketRehash(d, idx);
} else {
/* If the hash entry is not in ht0, we rehash the buckets based
* on the rehashidx (not CPU cache friendly). */
_dictRehashStep(d);
}
}
/* Expand the hash table if needed */ /* Expand the hash table if needed */
_dictExpandIfNeeded(d); _dictExpandIfNeeded(d);
@ -1614,6 +1661,7 @@ void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing)
return bucket; return bucket;
} }
void dictEmpty(dict *d, void(callback)(dict*)) { void dictEmpty(dict *d, void(callback)(dict*)) {
/* Someone may be monitoring a dict that started rehashing, before /* Someone may be monitoring a dict that started rehashing, before
* destroying the dict fake completion. */ * destroying the dict fake completion. */
@ -1639,7 +1687,7 @@ uint64_t dictGetHash(dict *d, const void *key) {
* the hash value should be provided using dictGetHash. * the hash value should be provided using dictGetHash.
* no string / key comparison is performed. * no string / key comparison is performed.
* return value is a pointer to the dictEntry if found, or NULL if not found. */ * return value is a pointer to the dictEntry if found, or NULL if not found. */
dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash) { dictEntry *dictFindByHashAndPtr(dict *d, const void *oldptr, const uint64_t hash) {
dictEntry *he; dictEntry *he;
unsigned long idx, table; unsigned long idx, table;
@ -1821,6 +1869,32 @@ char *stringFromLongLong(long long value) {
return s; return s;
} }
char *stringFromSubstring(void) {
#define LARGE_STRING_SIZE 10000
#define MIN_STRING_SIZE 100
#define MAX_STRING_SIZE 500
static char largeString[LARGE_STRING_SIZE + 1];
static int init = 0;
if (init == 0) {
/* Generate a large string */
for (size_t i = 0; i < LARGE_STRING_SIZE; i++) {
/* Random printable ASCII character (33 to 126) */
largeString[i] = 33 + (rand() % 94);
}
/* Null-terminate the large string */
largeString[LARGE_STRING_SIZE] = '\0';
init = 1;
}
/* Randomly choose a size between minSize and maxSize */
size_t substringSize = MIN_STRING_SIZE + (rand() % (MAX_STRING_SIZE - MIN_STRING_SIZE + 1));
size_t startIndex = rand() % (LARGE_STRING_SIZE - substringSize + 1);
/* Allocate memory for the substring (+1 for null terminator) */
char *s = zmalloc(substringSize + 1);
memcpy(s, largeString + startIndex, substringSize); // Copy the substring
s[substringSize] = '\0'; // Null-terminate the string
return s;
}
dictType BenchmarkDictType = { dictType BenchmarkDictType = {
hashCallback, hashCallback,
NULL, NULL,
@ -1842,7 +1916,9 @@ int dictTest(int argc, char **argv, int flags) {
long j; long j;
long long start, elapsed; long long start, elapsed;
int retval; int retval;
dict *dict = dictCreate(&BenchmarkDictType); dict *d = dictCreate(&BenchmarkDictType);
dictEntry* de = NULL;
dictEntry* existing = NULL;
long count = 0; long count = 0;
unsigned long new_dict_size, current_dict_used, remain_keys; unsigned long new_dict_size, current_dict_used, remain_keys;
int accurate = (flags & REDIS_TEST_ACCURATE); int accurate = (flags & REDIS_TEST_ACCURATE);
@ -1860,12 +1936,12 @@ int dictTest(int argc, char **argv, int flags) {
TEST("Add 16 keys and verify dict resize is ok") { TEST("Add 16 keys and verify dict resize is ok") {
dictSetResizeEnabled(DICT_RESIZE_ENABLE); dictSetResizeEnabled(DICT_RESIZE_ENABLE);
for (j = 0; j < 16; j++) { for (j = 0; j < 16; j++) {
retval = dictAdd(dict,stringFromLongLong(j),(void*)j); retval = dictAdd(d,stringFromLongLong(j),(void*)j);
assert(retval == DICT_OK); assert(retval == DICT_OK);
} }
while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000); while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000);
assert(dictSize(dict) == 16); assert(dictSize(d) == 16);
assert(dictBuckets(dict) == 16); assert(dictBuckets(d) == 16);
} }
TEST("Use DICT_RESIZE_AVOID to disable the dict resize and pad to (dict_force_resize_ratio * 16)") { TEST("Use DICT_RESIZE_AVOID to disable the dict resize and pad to (dict_force_resize_ratio * 16)") {
@ -1874,132 +1950,218 @@ int dictTest(int argc, char **argv, int flags) {
* dict_force_resize_ratio in next test. */ * dict_force_resize_ratio in next test. */
dictSetResizeEnabled(DICT_RESIZE_AVOID); dictSetResizeEnabled(DICT_RESIZE_AVOID);
for (j = 16; j < (long)dict_force_resize_ratio * 16; j++) { for (j = 16; j < (long)dict_force_resize_ratio * 16; j++) {
retval = dictAdd(dict,stringFromLongLong(j),(void*)j); retval = dictAdd(d,stringFromLongLong(j),(void*)j);
assert(retval == DICT_OK); assert(retval == DICT_OK);
} }
current_dict_used = dict_force_resize_ratio * 16; current_dict_used = dict_force_resize_ratio * 16;
assert(dictSize(dict) == current_dict_used); assert(dictSize(d) == current_dict_used);
assert(dictBuckets(dict) == 16); assert(dictBuckets(d) == 16);
} }
TEST("Add one more key, trigger the dict resize") { TEST("Add one more key, trigger the dict resize") {
retval = dictAdd(dict,stringFromLongLong(current_dict_used),(void*)(current_dict_used)); retval = dictAdd(d,stringFromLongLong(current_dict_used),(void*)(current_dict_used));
assert(retval == DICT_OK); assert(retval == DICT_OK);
current_dict_used++; current_dict_used++;
new_dict_size = 1UL << _dictNextExp(current_dict_used); new_dict_size = 1UL << _dictNextExp(current_dict_used);
assert(dictSize(dict) == current_dict_used); assert(dictSize(d) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == 16); assert(DICTHT_SIZE(d->ht_size_exp[0]) == 16);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == new_dict_size); assert(DICTHT_SIZE(d->ht_size_exp[1]) == new_dict_size);
/* Wait for rehashing. */ /* Wait for rehashing. */
dictSetResizeEnabled(DICT_RESIZE_ENABLE); dictSetResizeEnabled(DICT_RESIZE_ENABLE);
while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000); while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000);
assert(dictSize(dict) == current_dict_used); assert(dictSize(d) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size); assert(DICTHT_SIZE(d->ht_size_exp[0]) == new_dict_size);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0); assert(DICTHT_SIZE(d->ht_size_exp[1]) == 0);
} }
TEST("Delete keys until we can trigger shrink in next test") { TEST("Delete keys until we can trigger shrink in next test") {
/* Delete keys until we can satisfy (1 / HASHTABLE_MIN_FILL) in the next test. */ /* Delete keys until we can satisfy (1 / HASHTABLE_MIN_FILL) in the next test. */
for (j = new_dict_size / HASHTABLE_MIN_FILL + 1; j < (long)current_dict_used; j++) { for (j = new_dict_size / HASHTABLE_MIN_FILL + 1; j < (long)current_dict_used; j++) {
char *key = stringFromLongLong(j); char *key = stringFromLongLong(j);
retval = dictDelete(dict, key); retval = dictDelete(d, key);
zfree(key); zfree(key);
assert(retval == DICT_OK); assert(retval == DICT_OK);
} }
current_dict_used = new_dict_size / HASHTABLE_MIN_FILL + 1; current_dict_used = new_dict_size / HASHTABLE_MIN_FILL + 1;
assert(dictSize(dict) == current_dict_used); assert(dictSize(d) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size); assert(DICTHT_SIZE(d->ht_size_exp[0]) == new_dict_size);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0); assert(DICTHT_SIZE(d->ht_size_exp[1]) == 0);
} }
TEST("Delete one more key, trigger the dict resize") { TEST("Delete one more key, trigger the dict resize") {
current_dict_used--; current_dict_used--;
char *key = stringFromLongLong(current_dict_used); char *key = stringFromLongLong(current_dict_used);
retval = dictDelete(dict, key); retval = dictDelete(d, key);
zfree(key); zfree(key);
unsigned long oldDictSize = new_dict_size; unsigned long oldDictSize = new_dict_size;
new_dict_size = 1UL << _dictNextExp(current_dict_used); new_dict_size = 1UL << _dictNextExp(current_dict_used);
assert(retval == DICT_OK); assert(retval == DICT_OK);
assert(dictSize(dict) == current_dict_used); assert(dictSize(d) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == oldDictSize); assert(DICTHT_SIZE(d->ht_size_exp[0]) == oldDictSize);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == new_dict_size); assert(DICTHT_SIZE(d->ht_size_exp[1]) == new_dict_size);
/* Wait for rehashing. */ /* Wait for rehashing. */
while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000); while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000);
assert(dictSize(dict) == current_dict_used); assert(dictSize(d) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size); assert(DICTHT_SIZE(d->ht_size_exp[0]) == new_dict_size);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0); assert(DICTHT_SIZE(d->ht_size_exp[1]) == 0);
} }
TEST("Empty the dictionary and add 128 keys") { TEST("Empty the dictionary and add 128 keys") {
dictEmpty(dict, NULL); dictEmpty(d, NULL);
for (j = 0; j < 128; j++) { for (j = 0; j < 128; j++) {
retval = dictAdd(dict,stringFromLongLong(j),(void*)j); retval = dictAdd(d,stringFromLongLong(j),(void*)j);
assert(retval == DICT_OK); assert(retval == DICT_OK);
} }
while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000); while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000);
assert(dictSize(dict) == 128); assert(dictSize(d) == 128);
assert(dictBuckets(dict) == 128); assert(dictBuckets(d) == 128);
} }
TEST("Use DICT_RESIZE_AVOID to disable the dict resize and reduce to 3") { TEST("Use DICT_RESIZE_AVOID to disable the dict resize and reduce to 3") {
/* Use DICT_RESIZE_AVOID to disable the dict reset, and reduce /* Use DICT_RESIZE_AVOID to disable the dict reset, and reduce
* the number of keys until we can trigger shrinking in next test. */ * the number of keys until we can trigger shrinking in next test. */
dictSetResizeEnabled(DICT_RESIZE_AVOID); dictSetResizeEnabled(DICT_RESIZE_AVOID);
remain_keys = DICTHT_SIZE(dict->ht_size_exp[0]) / (HASHTABLE_MIN_FILL * dict_force_resize_ratio) + 1; remain_keys = DICTHT_SIZE(d->ht_size_exp[0]) / (HASHTABLE_MIN_FILL * dict_force_resize_ratio) + 1;
for (j = remain_keys; j < 128; j++) { for (j = remain_keys; j < 128; j++) {
char *key = stringFromLongLong(j); char *key = stringFromLongLong(j);
retval = dictDelete(dict, key); retval = dictDelete(d, key);
zfree(key); zfree(key);
assert(retval == DICT_OK); assert(retval == DICT_OK);
} }
current_dict_used = remain_keys; current_dict_used = remain_keys;
assert(dictSize(dict) == remain_keys); assert(dictSize(d) == remain_keys);
assert(dictBuckets(dict) == 128); assert(dictBuckets(d) == 128);
} }
TEST("Delete one more key, trigger the dict resize") { TEST("Delete one more key, trigger the dict resize") {
current_dict_used--; current_dict_used--;
char *key = stringFromLongLong(current_dict_used); char *key = stringFromLongLong(current_dict_used);
retval = dictDelete(dict, key); retval = dictDelete(d, key);
zfree(key); zfree(key);
new_dict_size = 1UL << _dictNextExp(current_dict_used); new_dict_size = 1UL << _dictNextExp(current_dict_used);
assert(retval == DICT_OK); assert(retval == DICT_OK);
assert(dictSize(dict) == current_dict_used); assert(dictSize(d) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == 128); assert(DICTHT_SIZE(d->ht_size_exp[0]) == 128);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == new_dict_size); assert(DICTHT_SIZE(d->ht_size_exp[1]) == new_dict_size);
/* Wait for rehashing. */ /* Wait for rehashing. */
dictSetResizeEnabled(DICT_RESIZE_ENABLE); dictSetResizeEnabled(DICT_RESIZE_ENABLE);
while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000); while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000);
assert(dictSize(dict) == current_dict_used); assert(dictSize(d) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size); assert(DICTHT_SIZE(d->ht_size_exp[0]) == new_dict_size);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0); assert(DICTHT_SIZE(d->ht_size_exp[1]) == 0);
} }
TEST("Restore to original state") { TEST("Restore to original state") {
dictEmpty(dict, NULL); dictEmpty(d, NULL);
dictSetResizeEnabled(DICT_RESIZE_ENABLE); dictSetResizeEnabled(DICT_RESIZE_ENABLE);
} }
srand(12345);
start_benchmark();
for (j = 0; j < count; j++) {
/* Create a dynamically allocated substring */
char *key = stringFromSubstring();
/* Insert the range directly from the large string */
de = dictAddRaw(d, key, &existing);
assert(de != NULL || existing != NULL);
/* If key already exists NULL is returned so we need to free the temp key string */
if (de == NULL) zfree(key);
}
end_benchmark("Inserting random substrings (100-500B) from large string with symbols");
assert((long)dictSize(d) <= count);
dictEmpty(d, NULL);
start_benchmark(); start_benchmark();
for (j = 0; j < count; j++) { for (j = 0; j < count; j++) {
retval = dictAdd(dict,stringFromLongLong(j),(void*)j); retval = dictAdd(d,stringFromLongLong(j),(void*)j);
assert(retval == DICT_OK); assert(retval == DICT_OK);
} }
end_benchmark("Inserting"); end_benchmark("Inserting via dictAdd() non existing");
assert((long)dictSize(dict) == count); assert((long)dictSize(d) == count);
dictEmpty(d, NULL);
start_benchmark();
for (j = 0; j < count; j++) {
de = dictAddRaw(d,stringFromLongLong(j),NULL);
assert(de != NULL);
}
end_benchmark("Inserting via dictAddRaw() non existing");
assert((long)dictSize(d) == count);
start_benchmark();
for (j = 0; j < count; j++) {
void *key = stringFromLongLong(j);
de = dictAddRaw(d,key,&existing);
assert(existing != NULL);
zfree(key);
}
end_benchmark("Inserting via dictAddRaw() existing (no insertion)");
assert((long)dictSize(d) == count);
dictEmpty(d, NULL);
start_benchmark();
for (j = 0; j < count; j++) {
void *key = stringFromLongLong(j);
const uint64_t hash = dictGetHash(d, key);
de = dictAddNonExistsByHash(d,key,hash);
assert(de != NULL);
}
end_benchmark("Inserting via dictAddNonExistsByHash() non existing");
assert((long)dictSize(d) == count);
/* Wait for rehashing. */ /* Wait for rehashing. */
while (dictIsRehashing(dict)) { while (dictIsRehashing(d)) {
dictRehashMicroseconds(dict,100*1000); dictRehashMicroseconds(d,100*1000);
}
dictEmpty(d, NULL);
start_benchmark();
for (j = 0; j < count; j++) {
/* Create a key */
void *key = stringFromLongLong(j);
/* Check if the key exists */
dictEntry *entry = dictFind(d, key);
assert(entry == NULL);
/* Add the key */
dictEntry *de = dictAddRaw(d, key, NULL);
assert(de != NULL);
}
end_benchmark("Find() and inserting via dictFind()+dictAddRaw() non existing");
dictEmpty(d, NULL);
start_benchmark();
for (j = 0; j < count; j++) {
/* Create a key */
void *key = stringFromLongLong(j);
uint64_t hash = dictGetHash(d, key);
/* Check if the key exists */
dictEntry *entry = dictFindByHash(d, key, hash);
assert(entry == NULL);
de = dictAddNonExistsByHash(d, key, hash);
assert(de != NULL);
}
end_benchmark("Find() and inserting via dictGetHash()+dictFindByHash()+dictAddNonExistsByHash() non existing");
assert((long)dictSize(d) == count);
/* Wait for rehashing. */
while (dictIsRehashing(d)) {
dictRehashMicroseconds(d,100*1000);
} }
start_benchmark(); start_benchmark();
for (j = 0; j < count; j++) { for (j = 0; j < count; j++) {
char *key = stringFromLongLong(j); char *key = stringFromLongLong(j);
dictEntry *de = dictFind(dict,key); dictEntry *de = dictFind(d,key);
assert(de != NULL); assert(de != NULL);
zfree(key); zfree(key);
} }
@ -2008,7 +2170,7 @@ int dictTest(int argc, char **argv, int flags) {
start_benchmark(); start_benchmark();
for (j = 0; j < count; j++) { for (j = 0; j < count; j++) {
char *key = stringFromLongLong(j); char *key = stringFromLongLong(j);
dictEntry *de = dictFind(dict,key); dictEntry *de = dictFind(d,key);
assert(de != NULL); assert(de != NULL);
zfree(key); zfree(key);
} }
@ -2017,7 +2179,7 @@ int dictTest(int argc, char **argv, int flags) {
start_benchmark(); start_benchmark();
for (j = 0; j < count; j++) { for (j = 0; j < count; j++) {
char *key = stringFromLongLong(rand() % count); char *key = stringFromLongLong(rand() % count);
dictEntry *de = dictFind(dict,key); dictEntry *de = dictFind(d,key);
assert(de != NULL); assert(de != NULL);
zfree(key); zfree(key);
} }
@ -2025,7 +2187,7 @@ int dictTest(int argc, char **argv, int flags) {
start_benchmark(); start_benchmark();
for (j = 0; j < count; j++) { for (j = 0; j < count; j++) {
dictEntry *de = dictGetRandomKey(dict); dictEntry *de = dictGetRandomKey(d);
assert(de != NULL); assert(de != NULL);
} }
end_benchmark("Accessing random keys"); end_benchmark("Accessing random keys");
@ -2034,7 +2196,7 @@ int dictTest(int argc, char **argv, int flags) {
for (j = 0; j < count; j++) { for (j = 0; j < count; j++) {
char *key = stringFromLongLong(rand() % count); char *key = stringFromLongLong(rand() % count);
key[0] = 'X'; key[0] = 'X';
dictEntry *de = dictFind(dict,key); dictEntry *de = dictFind(d,key);
assert(de == NULL); assert(de == NULL);
zfree(key); zfree(key);
} }
@ -2043,14 +2205,52 @@ int dictTest(int argc, char **argv, int flags) {
start_benchmark(); start_benchmark();
for (j = 0; j < count; j++) { for (j = 0; j < count; j++) {
char *key = stringFromLongLong(j); char *key = stringFromLongLong(j);
retval = dictDelete(dict,key); retval = dictDelete(d,key);
assert(retval == DICT_OK); assert(retval == DICT_OK);
key[0] += 17; /* Change first number to letter. */ key[0] += 17; /* Change first number to letter. */
retval = dictAdd(dict,key,(void*)j); retval = dictAdd(d,key,(void*)j);
assert(retval == DICT_OK); assert(retval == DICT_OK);
} }
end_benchmark("Removing and adding"); end_benchmark("Removing and adding");
dictRelease(dict); dictRelease(d);
TEST("Use dict without values (no_value=1)") {
dictType dt = BenchmarkDictType;
dt.no_value = 1;
/* Allocate array of size count and fill it with keys (stringFromLongLong(j) */
char **lookupKeys = zmalloc(sizeof(char*) * count);
for (long j = 0; j < count; j++)
lookupKeys[j] = stringFromLongLong(j);
/* Add keys without values. */
dict *d = dictCreate(&dt);
for (j = 0; j < count; j++) {
retval = dictAdd(d,lookupKeys[j],NULL);
assert(retval == DICT_OK);
}
/* Now, we should be able to find the keys. */
for (j = 0; j < count; j++) {
dictEntry *de = dictFind(d,lookupKeys[j]);
assert(de != NULL);
}
/* Find non exists keys. */
for (j = 0; j < count; j++) {
/* Temporarily override first char of key */
char tmp = lookupKeys[j][0];
lookupKeys[j][0] = 'X';
dictEntry *de = dictFind(d,lookupKeys[j]);
lookupKeys[j][0] = tmp;
assert(de == NULL);
}
dictRelease(d);
zfree(lookupKeys);
}
return 0; return 0;
} }
#endif #endif

View File

@ -53,15 +53,19 @@ typedef struct dictType {
/* Flags */ /* Flags */
/* The 'no_value' flag, if set, indicates that values are not used, i.e. the /* The 'no_value' flag, if set, indicates that values are not used, i.e. the
* dict is a set. When this flag is set, it's not possible to access the * dict is a set. When this flag is set, it's not possible to access the
* value of a dictEntry and it's also impossible to use dictSetKey(). Entry * value of a dictEntry and it's also impossible to use dictSetKey(). It
* metadata can also not be used. */ * enables an optimization to store a key directly without an allocating
* dictEntry in between, if it is the only key in the bucket. */
unsigned int no_value:1; unsigned int no_value:1;
/* If no_value = 1 and all keys are odd (LSB=1), setting keys_are_odd = 1 /* This flag is required for `no_value` optimization since the optimization
* enables one more optimization: to store a key without an allocated * reuses LSB bits as metadata */
* dictEntry. */
unsigned int keys_are_odd:1; unsigned int keys_are_odd:1;
/* TODO: Add a 'keys_are_even' flag and use a similar optimization if that /* TODO: Add a 'keys_are_even' flag and use a similar optimization if that
* flag is set. */ * flag is set. */
/* Ensures that the entire hash table is rehashed at once if set. */
unsigned int force_full_rehash:1;
/* Sometimes we want the ability to store a key in a given way inside the hash /* Sometimes we want the ability to store a key in a given way inside the hash
* function, and lookup it in some other way without resorting to any kind of * function, and lookup it in some other way without resorting to any kind of
* conversion. For instance the key may be stored as a structure also * conversion. For instance the key may be stored as a structure also
@ -196,6 +200,7 @@ int dictTryExpand(dict *d, unsigned long size);
int dictShrink(dict *d, unsigned long size); int dictShrink(dict *d, unsigned long size);
int dictAdd(dict *d, void *key, void *val); int dictAdd(dict *d, void *key, void *val);
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing); dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing);
dictEntry *dictAddNonExistsByHash(dict *d, void *key, const uint64_t hash);
void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing); void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing);
dictEntry *dictInsertAtPosition(dict *d, void *key, void *position); dictEntry *dictInsertAtPosition(dict *d, void *key, void *position);
dictEntry *dictAddOrFind(dict *d, void *key); dictEntry *dictAddOrFind(dict *d, void *key);
@ -207,6 +212,8 @@ dictEntry *dictTwoPhaseUnlinkFind(dict *d, const void *key, dictEntry ***plink,
void dictTwoPhaseUnlinkFree(dict *d, dictEntry *he, dictEntry **plink, int table_index); void dictTwoPhaseUnlinkFree(dict *d, dictEntry *he, dictEntry **plink, int table_index);
void dictRelease(dict *d); void dictRelease(dict *d);
dictEntry * dictFind(dict *d, const void *key); dictEntry * dictFind(dict *d, const void *key);
dictEntry *dictFindByHash(dict *d, const void *key, const uint64_t hash);
dictEntry *dictFindByHashAndPtr(dict *d, const void *oldptr, const uint64_t hash);
void *dictFetchValue(dict *d, const void *key); void *dictFetchValue(dict *d, const void *key);
int dictShrinkIfNeeded(dict *d); int dictShrinkIfNeeded(dict *d);
int dictExpandIfNeeded(dict *d); int dictExpandIfNeeded(dict *d);
@ -249,7 +256,6 @@ uint8_t *dictGetHashFunctionSeed(void);
unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata); unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata);
unsigned long dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata); unsigned long dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata);
uint64_t dictGetHash(dict *d, const void *key); uint64_t dictGetHash(dict *d, const void *key);
dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash);
void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size); void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size);
size_t dictGetStatsMsg(char *buf, size_t bufsize, dictStats *stats, int full); size_t dictGetStatsMsg(char *buf, size_t bufsize, dictStats *stats, int full);

View File

@ -93,7 +93,7 @@ struct ldbState {
* bodies in order to obtain the Lua function name, and in the implementation * bodies in order to obtain the Lua function name, and in the implementation
* of redis.sha1(). * of redis.sha1().
* *
* 'digest' should point to a 41 bytes buffer: 40 for SHA1 converted into an * 'digest' should point to a 41 bytes buffer: 40 for SHA1 converted into a
* hexadecimal number, plus 1 byte for null term. */ * hexadecimal number, plus 1 byte for null term. */
void sha1hex(char *digest, char *script, size_t len) { void sha1hex(char *digest, char *script, size_t len) {
SHA1_CTX ctx; SHA1_CTX ctx;
@ -259,12 +259,17 @@ void scriptingInit(int setup) {
void freeLuaScriptsSync(dict *lua_scripts, list *lua_scripts_lru_list, lua_State *lua) { void freeLuaScriptsSync(dict *lua_scripts, list *lua_scripts_lru_list, lua_State *lua) {
dictRelease(lua_scripts); dictRelease(lua_scripts);
listRelease(lua_scripts_lru_list); listRelease(lua_scripts_lru_list);
lua_close(lua);
#if defined(USE_JEMALLOC) #if defined(USE_JEMALLOC)
/* When lua is closed, destroy the previously used private tcache. */ /* When lua is closed, destroy the previously used private tcache. */
void *ud = (global_State*)G(lua)->ud; void *ud = (global_State*)G(lua)->ud;
unsigned int lua_tcache = (unsigned int)(uintptr_t)ud; unsigned int lua_tcache = (unsigned int)(uintptr_t)ud;
#endif
lua_gc(lua, LUA_GCCOLLECT, 0);
lua_close(lua);
#if defined(USE_JEMALLOC)
je_mallctl("tcache.destroy", NULL, NULL, (void *)&lua_tcache, sizeof(unsigned int)); je_mallctl("tcache.destroy", NULL, NULL, (void *)&lua_tcache, sizeof(unsigned int));
#endif #endif
} }
@ -730,7 +735,7 @@ NULL
} }
} }
unsigned long evalMemory(void) { unsigned long evalScriptsMemoryVM(void) {
return luaMemory(lctx.lua); return luaMemory(lctx.lua);
} }
@ -738,7 +743,7 @@ dict* evalScriptsDict(void) {
return lctx.lua_scripts; return lctx.lua_scripts;
} }
unsigned long evalScriptsMemory(void) { unsigned long evalScriptsMemoryEngine(void) {
return lctx.lua_scripts_mem + return lctx.lua_scripts_mem +
dictMemUsage(lctx.lua_scripts) + dictMemUsage(lctx.lua_scripts) +
dictSize(lctx.lua_scripts) * sizeof(luaScript) + dictSize(lctx.lua_scripts) * sizeof(luaScript) +
@ -754,7 +759,7 @@ void ldbInit(void) {
ldb.conn = NULL; ldb.conn = NULL;
ldb.active = 0; ldb.active = 0;
ldb.logs = listCreate(); ldb.logs = listCreate();
listSetFreeMethod(ldb.logs,(void (*)(void*))sdsfree); listSetFreeMethod(ldb.logs, sdsfreegeneric);
ldb.children = listCreate(); ldb.children = listCreate();
ldb.src = NULL; ldb.src = NULL;
ldb.lines = 0; ldb.lines = 0;

97
src/eventnotifier.c Normal file
View File

@ -0,0 +1,97 @@
/* eventnotifier.c -- An event notifier based on eventfd or pipe.
*
* Copyright (c) 2024-Present, Redis Ltd.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
*/
#include "eventnotifier.h"
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#ifdef HAVE_EVENT_FD
#include <sys/eventfd.h>
#endif
#include "anet.h"
#include "zmalloc.h"
eventNotifier* createEventNotifier(void) {
eventNotifier *en = zmalloc(sizeof(eventNotifier));
if (!en) return NULL;
#ifdef HAVE_EVENT_FD
if ((en->efd = eventfd(0, EFD_NONBLOCK| EFD_CLOEXEC)) != -1) {
return en;
}
#else
if (anetPipe(en->pipefd, O_CLOEXEC|O_NONBLOCK, O_CLOEXEC|O_NONBLOCK) != -1) {
return en;
}
#endif
/* Clean up if error. */
zfree(en);
return NULL;
}
int getReadEventFd(struct eventNotifier *en) {
#ifdef HAVE_EVENT_FD
return en->efd;
#else
return en->pipefd[0];
#endif
}
int getWriteEventFd(struct eventNotifier *en) {
#ifdef HAVE_EVENT_FD
return en->efd;
#else
return en->pipefd[1];
#endif
}
int triggerEventNotifier(struct eventNotifier *en) {
#ifdef HAVE_EVENT_FD
uint64_t u = 1;
if (write(en->efd, &u, sizeof(uint64_t)) == -1) {
return EN_ERR;
}
#else
char buf[1] = {'R'};
if (write(en->pipefd[1], buf, 1) == -1) {
return EN_ERR;
}
#endif
return EN_OK;
}
int handleEventNotifier(struct eventNotifier *en) {
#ifdef HAVE_EVENT_FD
uint64_t u;
if (read(en->efd, &u, sizeof(uint64_t)) == -1) {
return EN_ERR;
}
#else
char buf[1];
if (read(en->pipefd[0], buf, 1) == -1) {
return EN_ERR;
}
#endif
return EN_OK;
}
void freeEventNotifier(struct eventNotifier *en) {
#ifdef HAVE_EVENT_FD
close(en->efd);
#else
close(en->pipefd[0]);
close(en->pipefd[1]);
#endif
/* Free memory */
zfree(en);
}

33
src/eventnotifier.h Normal file
View File

@ -0,0 +1,33 @@
/* eventnotifier.h -- An event notifier based on eventfd or pipe.
*
* Copyright (c) 2024-Present, Redis Ltd.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
*/
#ifndef EVENTNOTIFIER_H
#define EVENTNOTIFIER_H
#include "config.h"
#define EN_OK 0
#define EN_ERR -1
typedef struct eventNotifier {
#ifdef HAVE_EVENT_FD
int efd;
#else
int pipefd[2];
#endif
} eventNotifier;
eventNotifier* createEventNotifier(void);
int getReadEventFd(struct eventNotifier *en);
int getWriteEventFd(struct eventNotifier *en);
int triggerEventNotifier(struct eventNotifier *en);
int handleEventNotifier(struct eventNotifier *en);
void freeEventNotifier(struct eventNotifier *en);
#endif

View File

@ -23,6 +23,9 @@
#include <lua.h> #include <lua.h>
#include <lauxlib.h> #include <lauxlib.h>
#include <lualib.h> #include <lualib.h>
#if defined(USE_JEMALLOC)
#include <lstate.h>
#endif
#define LUA_ENGINE_NAME "LUA" #define LUA_ENGINE_NAME "LUA"
#define REGISTRY_ENGINE_CTX_NAME "__ENGINE_CTX__" #define REGISTRY_ENGINE_CTX_NAME "__ENGINE_CTX__"
@ -189,8 +192,19 @@ static void luaEngineFreeFunction(void *engine_ctx, void *compiled_function) {
static void luaEngineFreeCtx(void *engine_ctx) { static void luaEngineFreeCtx(void *engine_ctx) {
luaEngineCtx *lua_engine_ctx = engine_ctx; luaEngineCtx *lua_engine_ctx = engine_ctx;
#if defined(USE_JEMALLOC)
/* When lua is closed, destroy the previously used private tcache. */
void *ud = (global_State*)G(lua_engine_ctx->lua)->ud;
unsigned int lua_tcache = (unsigned int)(uintptr_t)ud;
#endif
lua_gc(lua_engine_ctx->lua, LUA_GCCOLLECT, 0);
lua_close(lua_engine_ctx->lua); lua_close(lua_engine_ctx->lua);
zfree(lua_engine_ctx); zfree(lua_engine_ctx);
#if defined(USE_JEMALLOC)
je_mallctl("tcache.destroy", NULL, NULL, (void *)&lua_tcache, sizeof(unsigned int));
#endif
} }
static void luaRegisterFunctionArgsInitialize(registerFunctionArgs *register_f_args, static void luaRegisterFunctionArgsInitialize(registerFunctionArgs *register_f_args,

View File

@ -144,6 +144,10 @@ static void engineLibraryFree(functionLibInfo* li) {
zfree(li); zfree(li);
} }
static void engineLibraryFreeGeneric(void *li) {
engineLibraryFree((functionLibInfo *)li);
}
static void engineLibraryDispose(dict *d, void *obj) { static void engineLibraryDispose(dict *d, void *obj) {
UNUSED(d); UNUSED(d);
engineLibraryFree(obj); engineLibraryFree(obj);
@ -338,7 +342,7 @@ static int libraryJoin(functionsLibCtx *functions_lib_ctx_dst, functionsLibCtx *
} else { } else {
if (!old_libraries_list) { if (!old_libraries_list) {
old_libraries_list = listCreate(); old_libraries_list = listCreate();
listSetFreeMethod(old_libraries_list, (void (*)(void*))engineLibraryFree); listSetFreeMethod(old_libraries_list, engineLibraryFreeGeneric);
} }
libraryUnlink(functions_lib_ctx_dst, old_li); libraryUnlink(functions_lib_ctx_dst, old_li);
listAddNodeTail(old_libraries_list, old_li); listAddNodeTail(old_libraries_list, old_li);
@ -1063,7 +1067,7 @@ void functionLoadCommand(client *c) {
} }
/* Return memory usage of all the engines combine */ /* Return memory usage of all the engines combine */
unsigned long functionsMemory(void) { unsigned long functionsMemoryVM(void) {
dictIterator *iter = dictGetIterator(engines); dictIterator *iter = dictGetIterator(engines);
dictEntry *entry = NULL; dictEntry *entry = NULL;
size_t engines_memory = 0; size_t engines_memory = 0;
@ -1078,7 +1082,7 @@ unsigned long functionsMemory(void) {
} }
/* Return memory overhead of all the engines combine */ /* Return memory overhead of all the engines combine */
unsigned long functionsMemoryOverhead(void) { unsigned long functionsMemoryEngine(void) {
size_t memory_overhead = dictMemUsage(engines); size_t memory_overhead = dictMemUsage(engines);
memory_overhead += dictMemUsage(curr_functions_lib_ctx->functions); memory_overhead += dictMemUsage(curr_functions_lib_ctx->functions);
memory_overhead += sizeof(functionsLibCtx); memory_overhead += sizeof(functionsLibCtx);

View File

@ -102,8 +102,8 @@ struct functionLibInfo {
int functionsRegisterEngine(const char *engine_name, engine *engine_ctx); int functionsRegisterEngine(const char *engine_name, engine *engine_ctx);
sds functionsCreateWithLibraryCtx(sds code, int replace, sds* err, functionsLibCtx *lib_ctx, size_t timeout); sds functionsCreateWithLibraryCtx(sds code, int replace, sds* err, functionsLibCtx *lib_ctx, size_t timeout);
unsigned long functionsMemory(void); unsigned long functionsMemoryVM(void);
unsigned long functionsMemoryOverhead(void); unsigned long functionsMemoryEngine(void);
unsigned long functionsNum(void); unsigned long functionsNum(void);
unsigned long functionsLibNum(void); unsigned long functionsLibNum(void);
dict* functionsLibGet(void); dict* functionsLibGet(void);

View File

@ -4,8 +4,13 @@
* Copyright (c) 2014-Present, Redis Ltd. * Copyright (c) 2014-Present, Redis Ltd.
* All rights reserved. * All rights reserved.
* *
* Copyright (c) 2024-present, Valkey contributors.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0 * Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2) or the Server Side Public License v1 (SSPLv1). * (RSALv2) or the Server Side Public License v1 (SSPLv1).
*
* Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
*/ */
#include "server.h" #include "server.h"
@ -13,6 +18,13 @@
#include <stdint.h> #include <stdint.h>
#include <math.h> #include <math.h>
#ifdef HAVE_AVX2
/* Define __MM_MALLOC_H to prevent importing the memory aligned
* allocation functions, which we don't use. */
#define __MM_MALLOC_H
#include <immintrin.h>
#endif
/* The Redis HyperLogLog implementation is based on the following ideas: /* The Redis HyperLogLog implementation is based on the following ideas:
* *
* * The use of a 64 bit hash function as proposed in [1], in order to estimate * * The use of a 64 bit hash function as proposed in [1], in order to estimate
@ -186,6 +198,13 @@ struct hllhdr {
static char *invalid_hll_err = "-INVALIDOBJ Corrupted HLL object detected"; static char *invalid_hll_err = "-INVALIDOBJ Corrupted HLL object detected";
#ifdef HAVE_AVX2
static int simd_enabled = 1;
#define HLL_USE_AVX2 (simd_enabled && __builtin_cpu_supports("avx2"))
#else
#define HLL_USE_AVX2 0
#endif
/* =========================== Low level bit macros ========================= */ /* =========================== Low level bit macros ========================= */
/* Macros to access the dense representation. /* Macros to access the dense representation.
@ -1041,6 +1060,132 @@ int hllAdd(robj *o, unsigned char *ele, size_t elesize) {
} }
} }
#ifdef HAVE_AVX2
/* A specialized version of hllMergeDense, optimized for default configurations.
*
* Requirements:
* 1) HLL_REGISTERS == 16384 && HLL_BITS == 6
* 2) The CPU supports AVX2 (checked at runtime in hllMergeDense)
*
* reg_raw: pointer to the raw representation array (16384 bytes, one byte per register)
* reg_dense: pointer to the dense representation array (12288 bytes, 6 bits per register)
*/
ATTRIBUTE_TARGET_AVX2
void hllMergeDenseAVX2(uint8_t *reg_raw, const uint8_t *reg_dense) {
const __m256i shuffle = _mm256_setr_epi8( //
4, 5, 6, -1, //
7, 8, 9, -1, //
10, 11, 12, -1, //
13, 14, 15, -1, //
0, 1, 2, -1, //
3, 4, 5, -1, //
6, 7, 8, -1, //
9, 10, 11, -1 //
);
/* Merge the first 8 registers (6 bytes) normally
* as the AVX2 algorithm needs 4 padding bytes at the start */
uint8_t val;
for (int i = 0; i < 8; i++) {
HLL_DENSE_GET_REGISTER(val, reg_dense, i);
if (val > reg_raw[i]) {
reg_raw[i] = val;
}
}
/* Dense to Raw:
*
* 4 registers in 3 bytes:
* {bbaaaaaa|ccccbbbb|ddddddcc}
*
* LOAD 32 bytes (32 registers) per iteration:
* 4(padding) + 12(16 registers) + 12(16 registers) + 4(padding)
* {XXXX|AAAB|BBCC|CDDD|EEEF|FFGG|GHHH|XXXX}
*
* SHUFFLE to:
* {AAA0|BBB0|CCC0|DDD0|EEE0|FFF0|GGG0|HHH0}
* {bbaaaaaa|ccccbbbb|ddddddcc|00000000} x8
*
* AVX2 is little endian, each of the 8 groups is a little-endian int32.
* A group (int32) contains 3 valid bytes (4 registers) and a zero byte.
*
* extract registers in each group with AND and SHIFT:
* {00aaaaaa|00000000|00000000|00000000} x8 (<<0)
* {00000000|00bbbbbb|00000000|00000000} x8 (<<2)
* {00000000|00000000|00cccccc|00000000} x8 (<<4)
* {00000000|00000000|00000000|00dddddd} x8 (<<6)
*
* merge the extracted registers with OR:
* {00aaaaaa|00bbbbbb|00cccccc|00dddddd} x8
*
* Finally, compute MAX(reg_raw, merged) and STORE it back to reg_raw
*/
/* Skip 8 registers (6 bytes) */
const uint8_t *r = reg_dense + 6 - 4;
uint8_t *t = reg_raw + 8;
for (int i = 0; i < HLL_REGISTERS / 32 - 1; ++i) {
__m256i x0, x;
x0 = _mm256_loadu_si256((__m256i *)r);
x = _mm256_shuffle_epi8(x0, shuffle);
__m256i a1, a2, a3, a4;
a1 = _mm256_and_si256(x, _mm256_set1_epi32(0x0000003f));
a2 = _mm256_and_si256(x, _mm256_set1_epi32(0x00000fc0));
a3 = _mm256_and_si256(x, _mm256_set1_epi32(0x0003f000));
a4 = _mm256_and_si256(x, _mm256_set1_epi32(0x00fc0000));
a2 = _mm256_slli_epi32(a2, 2);
a3 = _mm256_slli_epi32(a3, 4);
a4 = _mm256_slli_epi32(a4, 6);
__m256i y1, y2, y;
y1 = _mm256_or_si256(a1, a2);
y2 = _mm256_or_si256(a3, a4);
y = _mm256_or_si256(y1, y2);
__m256i z = _mm256_loadu_si256((__m256i *)t);
z = _mm256_max_epu8(z, y);
_mm256_storeu_si256((__m256i *)t, z);
r += 24;
t += 32;
}
/* Merge the last 24 registers normally
* as the AVX2 algorithm needs 4 padding bytes at the end */
for (int i = HLL_REGISTERS - 24; i < HLL_REGISTERS; i++) {
HLL_DENSE_GET_REGISTER(val, reg_dense, i);
if (val > reg_raw[i]) {
reg_raw[i] = val;
}
}
}
#endif
/* Merge dense-encoded registers to raw registers array. */
void hllMergeDense(uint8_t* reg_raw, const uint8_t* reg_dense) {
#ifdef HAVE_AVX2
if (HLL_REGISTERS == 16384 && HLL_BITS == 6) {
if (HLL_USE_AVX2) {
hllMergeDenseAVX2(reg_raw, reg_dense);
return;
}
}
#endif
uint8_t val;
for (int i = 0; i < HLL_REGISTERS; i++) {
HLL_DENSE_GET_REGISTER(val, reg_dense, i);
if (val > reg_raw[i]) {
reg_raw[i] = val;
}
}
}
/* Merge by computing MAX(registers[i],hll[i]) the HyperLogLog 'hll' /* Merge by computing MAX(registers[i],hll[i]) the HyperLogLog 'hll'
* with an array of uint8_t HLL_REGISTERS registers pointed by 'max'. * with an array of uint8_t HLL_REGISTERS registers pointed by 'max'.
* *
@ -1054,12 +1199,7 @@ int hllMerge(uint8_t *max, robj *hll) {
int i; int i;
if (hdr->encoding == HLL_DENSE) { if (hdr->encoding == HLL_DENSE) {
uint8_t val; hllMergeDense(max, hdr->registers);
for (i = 0; i < HLL_REGISTERS; i++) {
HLL_DENSE_GET_REGISTER(val,hdr->registers,i);
if (val > max[i]) max[i] = val;
}
} else { } else {
uint8_t *p = hll->ptr, *end = p + sdslen(hll->ptr); uint8_t *p = hll->ptr, *end = p + sdslen(hll->ptr);
long runlen, regval; long runlen, regval;
@ -1091,6 +1231,117 @@ int hllMerge(uint8_t *max, robj *hll) {
return C_OK; return C_OK;
} }
#ifdef HAVE_AVX2
/* A specialized version of hllDenseCompress, optimized for default configurations.
*
* Requirements:
* 1) HLL_REGISTERS == 16384 && HLL_BITS == 6
* 2) The CPU supports AVX2 (checked at runtime in hllDenseCompress)
*
* reg_dense: pointer to the dense representation array (12288 bytes, 6 bits per register)
* reg_raw: pointer to the raw representation array (16384 bytes, one byte per register)
*/
ATTRIBUTE_TARGET_AVX2
void hllDenseCompressAVX2(uint8_t *reg_dense, const uint8_t *reg_raw) {
const __m256i shuffle = _mm256_setr_epi8( //
0, 1, 2, //
4, 5, 6, //
8, 9, 10, //
12, 13, 14, //
-1, -1, -1, -1, //
0, 1, 2, //
4, 5, 6, //
8, 9, 10, //
12, 13, 14, //
-1, -1, -1, -1 //
);
/* Raw to Dense:
*
* LOAD 32 bytes (32 registers) per iteration:
* {00aaaaaa|00bbbbbb|00cccccc|00dddddd} x8
*
* AVX2 is little endian, each of the 8 groups is a little-endian int32.
* A group (int32) contains 4 registers.
*
* move the registers to correct positions with AND and SHIFT:
* {00aaaaaa|00000000|00000000|00000000} x8 (>>0)
* {bb000000|0000bbbb|00000000|00000000} x8 (>>2)
* {00000000|cccc0000|000000cc|00000000} x8 (>>4)
* {00000000|00000000|dddddd00|00000000} x8 (>>6)
*
* merge the registers with OR:
* {bbaaaaaa|ccccbbbb|ddddddcc|00000000} x8
* {AAA0|BBB0|CCC0|DDD0|EEE0|FFF0|GGG0|HHH0}
*
* SHUFFLE to:
* {AAAB|BBCC|CDDD|0000|EEEF|FFGG|GHHH|0000}
*
* STORE the lower half and higher half respectively:
* AAABBBCCCDDD0000
* EEEFFFGGGHHH0000
* AAABBBCCCDDDEEEFFFGGGHHH0000
*
* Note that the last 4 bytes are padding bytes.
*/
const uint8_t *r = reg_raw;
uint8_t *t = reg_dense;
for (int i = 0; i < HLL_REGISTERS / 32 - 1; ++i) {
__m256i x = _mm256_loadu_si256((__m256i *)r);
__m256i a1, a2, a3, a4;
a1 = _mm256_and_si256(x, _mm256_set1_epi32(0x0000003f));
a2 = _mm256_and_si256(x, _mm256_set1_epi32(0x00003f00));
a3 = _mm256_and_si256(x, _mm256_set1_epi32(0x003f0000));
a4 = _mm256_and_si256(x, _mm256_set1_epi32(0x3f000000));
a2 = _mm256_srli_epi32(a2, 2);
a3 = _mm256_srli_epi32(a3, 4);
a4 = _mm256_srli_epi32(a4, 6);
__m256i y1, y2, y;
y1 = _mm256_or_si256(a1, a2);
y2 = _mm256_or_si256(a3, a4);
y = _mm256_or_si256(y1, y2);
y = _mm256_shuffle_epi8(y, shuffle);
__m128i lower, higher;
lower = _mm256_castsi256_si128(y);
higher = _mm256_extracti128_si256(y, 1);
_mm_storeu_si128((__m128i *)t, lower);
_mm_storeu_si128((__m128i *)(t + 12), higher);
r += 32;
t += 24;
}
/* Merge the last 32 registers normally
* as the AVX2 algorithm needs 4 padding bytes at the end */
for (int i = HLL_REGISTERS - 32; i < HLL_REGISTERS; i++) {
HLL_DENSE_SET_REGISTER(reg_dense, i, reg_raw[i]);
}
}
#endif
/* Compress raw registers to dense representation. */
void hllDenseCompress(uint8_t *reg_dense, const uint8_t *reg_raw) {
#ifdef HAVE_AVX2
if (HLL_REGISTERS == 16384 && HLL_BITS == 6) {
if (HLL_USE_AVX2) {
hllDenseCompressAVX2(reg_dense, reg_raw);
return;
}
}
#endif
for (int i = 0; i < HLL_REGISTERS; i++) {
HLL_DENSE_SET_REGISTER(reg_dense, i, reg_raw[i]);
}
}
/* ========================== HyperLogLog commands ========================== */ /* ========================== HyperLogLog commands ========================== */
/* Create an HLL object. We always create the HLL using sparse encoding. /* Create an HLL object. We always create the HLL using sparse encoding.
@ -1350,12 +1601,17 @@ void pfmergeCommand(client *c) {
/* Write the resulting HLL to the destination HLL registers and /* Write the resulting HLL to the destination HLL registers and
* invalidate the cached value. */ * invalidate the cached value. */
for (j = 0; j < HLL_REGISTERS; j++) { if (use_dense) {
if (max[j] == 0) continue;
hdr = o->ptr; hdr = o->ptr;
switch(hdr->encoding) { hllDenseCompress(hdr->registers, max);
case HLL_DENSE: hllDenseSet(hdr->registers,j,max[j]); break; } else {
case HLL_SPARSE: hllSparseSet(o,j,max[j]); break; for (j = 0; j < HLL_REGISTERS; j++) {
if (max[j] == 0) continue;
hdr = o->ptr;
switch (hdr->encoding) {
case HLL_DENSE: hllDenseSet(hdr->registers,j,max[j]); break;
case HLL_SPARSE: hllSparseSet(o,j,max[j]); break;
}
} }
} }
hdr = o->ptr; /* o->ptr may be different now, as a side effect of hdr = o->ptr; /* o->ptr may be different now, as a side effect of
@ -1484,6 +1740,7 @@ cleanup:
* PFDEBUG DECODE <key> * PFDEBUG DECODE <key>
* PFDEBUG ENCODING <key> * PFDEBUG ENCODING <key>
* PFDEBUG TODENSE <key> * PFDEBUG TODENSE <key>
* PFDEBUG SIMD (ON|OFF)
*/ */
void pfdebugCommand(client *c) { void pfdebugCommand(client *c) {
char *cmd = c->argv[1]->ptr; char *cmd = c->argv[1]->ptr;
@ -1491,6 +1748,26 @@ void pfdebugCommand(client *c) {
robj *o; robj *o;
int j; int j;
if (!strcasecmp(cmd, "simd")) {
if (c->argc != 3) goto arityerr;
if (!strcasecmp(c->argv[2]->ptr, "on")) {
#ifdef HAVE_AVX2
simd_enabled = 1;
#endif
} else if (!strcasecmp(c->argv[2]->ptr, "off")) {
#ifdef HAVE_AVX2
simd_enabled = 0;
#endif
} else {
addReplyError(c, "Argument must be ON or OFF");
}
addReplyStatus(c, HLL_USE_AVX2 ? "enabled" : "disabled");
return;
}
o = lookupKeyWrite(c->db,c->argv[2]); o = lookupKeyWrite(c->db,c->argv[2]);
if (o == NULL) { if (o == NULL) {
addReplyError(c,"The specified key does not exist"); addReplyError(c,"The specified key does not exist");

631
src/iothread.c Normal file
View File

@ -0,0 +1,631 @@
/* iothread.c -- The threaded io implementation.
*
* Copyright (c) 2024-Present, Redis Ltd.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
*/
#include "server.h"
/* IO threads. */
static IOThread IOThreads[IO_THREADS_MAX_NUM];
/* For main thread */
static list *mainThreadPendingClientsToIOThreads[IO_THREADS_MAX_NUM]; /* Clients to IO threads */
static list *mainThreadProcessingClients[IO_THREADS_MAX_NUM]; /* Clients in processing */
static list *mainThreadPendingClients[IO_THREADS_MAX_NUM]; /* Pending clients from IO threads */
static pthread_mutex_t mainThreadPendingClientsMutexes[IO_THREADS_MAX_NUM]; /* Mutex for pending clients */
static eventNotifier* mainThreadPendingClientsNotifiers[IO_THREADS_MAX_NUM]; /* Notifier for pending clients */
/* When IO threads read a complete query of clients or want to free clients, it
* should remove it from its clients list and put the client in the list to main
* thread, we will send these clients to main thread in IOThreadBeforeSleep. */
void enqueuePendingClientsToMainThread(client *c, int unbind) {
/* If the IO thread may no longer manage it, such as closing client, we should
* unbind client from event loop, so main thread doesn't need to do it costly. */
if (unbind) connUnbindEventLoop(c->conn);
/* Just skip if it already is transferred. */
if (c->io_thread_client_list_node) {
listDelNode(IOThreads[c->tid].clients, c->io_thread_client_list_node);
c->io_thread_client_list_node = NULL;
/* Disable read and write to avoid race when main thread processes. */
c->io_flags &= ~(CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED);
listAddNodeTail(IOThreads[c->tid].pending_clients_to_main_thread, c);
}
}
/* Unbind connection of client from io thread event loop, write and read handlers
* also be removed, ensures that we can operate the client safely. */
void unbindClientFromIOThreadEventLoop(client *c) {
serverAssert(c->tid != IOTHREAD_MAIN_THREAD_ID &&
c->running_tid == IOTHREAD_MAIN_THREAD_ID);
if (!connHasEventLoop(c->conn)) return;
/* As calling in main thread, we should pause the io thread to make it safe. */
pauseIOThread(c->tid);
connUnbindEventLoop(c->conn);
resumeIOThread(c->tid);
}
/* When main thread is processing a client from IO thread, and wants to keep it,
* we should unbind connection of client from io thread event loop first,
* and then bind the client connection into server's event loop. */
void keepClientInMainThread(client *c) {
serverAssert(c->tid != IOTHREAD_MAIN_THREAD_ID &&
c->running_tid == IOTHREAD_MAIN_THREAD_ID);
/* IO thread no longer manage it. */
server.io_threads_clients_num[c->tid]--;
/* Unbind connection of client from io thread event loop. */
unbindClientFromIOThreadEventLoop(c);
/* Let main thread to run it, rebind event loop and read handler */
connRebindEventLoop(c->conn, server.el);
connSetReadHandler(c->conn, readQueryFromClient);
c->io_flags |= CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED;
c->running_tid = IOTHREAD_MAIN_THREAD_ID;
c->tid = IOTHREAD_MAIN_THREAD_ID;
/* Main thread starts to manage it. */
server.io_threads_clients_num[c->tid]++;
}
/* If the client is managed by IO thread, we should fetch it from IO thread
* and then main thread will can process it. Just like IO Thread transfers
* the client to the main thread for processing. */
void fetchClientFromIOThread(client *c) {
serverAssert(c->tid != IOTHREAD_MAIN_THREAD_ID &&
c->running_tid != IOTHREAD_MAIN_THREAD_ID);
pauseIOThread(c->tid);
/* Remove the client from clients list of IO thread or main thread. */
if (c->io_thread_client_list_node) {
listDelNode(IOThreads[c->tid].clients, c->io_thread_client_list_node);
c->io_thread_client_list_node = NULL;
} else {
list *clients[5] = {
IOThreads[c->tid].pending_clients,
IOThreads[c->tid].pending_clients_to_main_thread,
mainThreadPendingClients[c->tid],
mainThreadProcessingClients[c->tid],
mainThreadPendingClientsToIOThreads[c->tid]
};
for (int i = 0; i < 5; i++) {
listNode *ln = listSearchKey(clients[i], c);
if (ln) {
listDelNode(clients[i], ln);
/* Client only can be in one client list. */
break;
}
}
}
/* Unbind connection of client from io thread event loop. */
connUnbindEventLoop(c->conn);
/* Now main thread can process it. */
c->running_tid = IOTHREAD_MAIN_THREAD_ID;
resumeIOThread(c->tid);
}
/* For some clients, we must handle them in the main thread, since there is
* data race to be processed in IO threads.
*
* - Close ASAP, we must free the client in main thread.
* - Replica, pubsub, monitor, blocked, tracking clients, main thread may
* directly write them a reply when conditions are met.
* - Script command with debug may operate connection directly. */
int isClientMustHandledByMainThread(client *c) {
if (c->flags & (CLIENT_CLOSE_ASAP | CLIENT_MASTER | CLIENT_SLAVE |
CLIENT_PUBSUB | CLIENT_MONITOR | CLIENT_BLOCKED |
CLIENT_UNBLOCKED | CLIENT_TRACKING | CLIENT_LUA_DEBUG |
CLIENT_LUA_DEBUG_SYNC))
{
return 1;
}
return 0;
}
/* When the main thread accepts a new client or transfers clients to IO threads,
* it assigns the client to the IO thread with the fewest clients. */
void assignClientToIOThread(client *c) {
serverAssert(c->tid == IOTHREAD_MAIN_THREAD_ID);
/* Find the IO thread with the fewest clients. */
int min_id = 0;
int min = INT_MAX;
for (int i = 1; i < server.io_threads_num; i++) {
if (server.io_threads_clients_num[i] < min) {
min = server.io_threads_clients_num[i];
min_id = i;
}
}
/* Assign the client to the IO thread. */
server.io_threads_clients_num[c->tid]--;
c->tid = min_id;
c->running_tid = min_id;
server.io_threads_clients_num[min_id]++;
/* Unbind connection of client from main thread event loop, disable read and
* write, and then put it in the list, main thread will send these clients
* to IO thread in beforeSleep. */
connUnbindEventLoop(c->conn);
c->io_flags &= ~(CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED);
listAddNodeTail(mainThreadPendingClientsToIOThreads[c->tid], c);
}
/* If updating maxclients config, we not only resize the event loop of main thread
* but also resize the event loop of all io threads, and if one thread is failed,
* it is failed totally, since a fd can be distributed into any IO thread. */
int resizeAllIOThreadsEventLoops(size_t newsize) {
int result = AE_OK;
if (server.io_threads_num <= 1) return result;
/* To make context safe. */
pauseAllIOThreads();
for (int i = 1; i < server.io_threads_num; i++) {
IOThread *t = &IOThreads[i];
if (aeResizeSetSize(t->el, newsize) == AE_ERR)
result = AE_ERR;
}
resumeAllIOThreads();
return result;
}
/* In the main thread, we may want to operate data of io threads, maybe uninstall
* event handler, access query/output buffer or resize event loop, we need a clean
* and safe context to do that. We pause io thread in IOThreadBeforeSleep, do some
* jobs and then resume it. To avoid thread suspended, we use busy waiting to confirm
* the target status. Besides we use atomic variable to make sure memory visibility
* and ordering.
*
* Make sure that only the main thread can call these function,
* - pauseIOThread, resumeIOThread
* - pauseAllIOThreads, resumeAllIOThreads
* - pauseIOThreadsRange, resumeIOThreadsRange
*
* The main thread will pause the io thread, and then wait for the io thread to
* be paused. The io thread will check the paused status in IOThreadBeforeSleep,
* and then pause itself.
*
* The main thread will resume the io thread, and then wait for the io thread to
* be resumed. The io thread will check the paused status in IOThreadBeforeSleep,
* and then resume itself.
*/
/* We may pause the same io thread nestedly, so we need to record the times of
* pausing, and only when the times of pausing is 0, we can pause the io thread,
* and only when the times of pausing is 1, we can resume the io thread. */
static int PausedIOThreads[IO_THREADS_MAX_NUM] = {0};
/* Pause the specific range of io threads, and wait for them to be paused. */
void pauseIOThreadsRange(int start, int end) {
if (!server.io_threads_active) return;
serverAssert(start >= 1 && end < server.io_threads_num && start <= end);
serverAssert(pthread_equal(pthread_self(), server.main_thread_id));
/* Try to make all io threads paused in parallel */
for (int i = start; i <= end; i++) {
PausedIOThreads[i]++;
/* Skip if already paused */
if (PausedIOThreads[i] > 1) continue;
int paused;
atomicGetWithSync(IOThreads[i].paused, paused);
/* Don't support to call reentrant */
serverAssert(paused == IO_THREAD_UNPAUSED);
atomicSetWithSync(IOThreads[i].paused, IO_THREAD_PAUSING);
/* Just notify io thread, no actual job, since io threads check paused
* status in IOThreadBeforeSleep, so just wake it up if polling wait. */
triggerEventNotifier(IOThreads[i].pending_clients_notifier);
}
/* Wait for all io threads paused */
for (int i = start; i <= end; i++) {
if (PausedIOThreads[i] > 1) continue;
int paused = IO_THREAD_PAUSING;
while (paused != IO_THREAD_PAUSED) {
atomicGetWithSync(IOThreads[i].paused, paused);
}
}
}
/* Resume the specific range of io threads, and wait for them to be resumed. */
void resumeIOThreadsRange(int start, int end) {
if (!server.io_threads_active) return;
serverAssert(start >= 1 && end < server.io_threads_num && start <= end);
serverAssert(pthread_equal(pthread_self(), server.main_thread_id));
for (int i = start; i <= end; i++) {
serverAssert(PausedIOThreads[i] > 0);
PausedIOThreads[i]--;
if (PausedIOThreads[i] > 0) continue;
int paused;
/* Check if it is paused, since we must call 'pause' and
* 'resume' in pairs */
atomicGetWithSync(IOThreads[i].paused, paused);
serverAssert(paused == IO_THREAD_PAUSED);
/* Resume */
atomicSetWithSync(IOThreads[i].paused, IO_THREAD_RESUMING);
while (paused != IO_THREAD_UNPAUSED) {
atomicGetWithSync(IOThreads[i].paused, paused);
}
}
}
/* The IO thread checks whether it is being paused, and if so, it pauses itself
* and waits for resuming, corresponding to the pause/resumeIOThread* functions.
* Currently, this is only called in IOThreadBeforeSleep, as there are no pending
* I/O events at this point, with a clean context. */
void handlePauseAndResume(IOThread *t) {
int paused;
/* Check if i am being paused. */
atomicGetWithSync(t->paused, paused);
if (paused == IO_THREAD_PAUSING) {
atomicSetWithSync(t->paused, IO_THREAD_PAUSED);
/* Wait for resuming */
while (paused != IO_THREAD_RESUMING) {
atomicGetWithSync(t->paused, paused);
}
atomicSetWithSync(t->paused, IO_THREAD_UNPAUSED);
}
}
/* Pause the specific io thread, and wait for it to be paused. */
void pauseIOThread(int id) {
pauseIOThreadsRange(id, id);
}
/* Resume the specific io thread, and wait for it to be resumed. */
void resumeIOThread(int id) {
resumeIOThreadsRange(id, id);
}
/* Pause all io threads, and wait for them to be paused. */
void pauseAllIOThreads(void) {
pauseIOThreadsRange(1, server.io_threads_num-1);
}
/* Resume all io threads, and wait for them to be resumed. */
void resumeAllIOThreads(void) {
resumeIOThreadsRange(1, server.io_threads_num-1);
}
/* Add the pending clients to the list of IO threads, and trigger an event to
* notify io threads to handle. */
int sendPendingClientsToIOThreads(void) {
int processed = 0;
for (int i = 1; i < server.io_threads_num; i++) {
int len = listLength(mainThreadPendingClientsToIOThreads[i]);
if (len > 0) {
IOThread *t = &IOThreads[i];
pthread_mutex_lock(&t->pending_clients_mutex);
listJoin(t->pending_clients, mainThreadPendingClientsToIOThreads[i]);
pthread_mutex_unlock(&t->pending_clients_mutex);
/* Trigger an event, maybe an error is returned when buffer is full
* if using pipe, but no worry, io thread will handle all clients
* in list when receiving a notification. */
triggerEventNotifier(t->pending_clients_notifier);
}
processed += len;
}
return processed;
}
extern int ProcessingEventsWhileBlocked;
/* The main thread processes the clients from IO threads, these clients may have
* a complete command to execute or need to be freed. Note that IO threads never
* free client since this operation access much server data.
*
* Please notice that this function may be called reentrantly, i,e, the same goes
* for handleClientsFromIOThread and processClientsOfAllIOThreads. For example,
* when processing script command, it may call processEventsWhileBlocked to
* process new events, if the clients with fired events from the same io thread,
* it may call this function reentrantly. */
void processClientsFromIOThread(IOThread *t) {
listNode *node = NULL;
while (listLength(mainThreadProcessingClients[t->id])) {
/* Each time we pop up only the first client to process to guarantee
* reentrancy safety. */
if (node) zfree(node);
node = listFirst(mainThreadProcessingClients[t->id]);
listUnlinkNode(mainThreadProcessingClients[t->id], node);
client *c = listNodeValue(node);
/* Make sure the client is readable or writable in io thread to
* avoid data race. */
serverAssert(!(c->io_flags & (CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED)));
serverAssert(!(c->flags & CLIENT_CLOSE_ASAP));
/* Let main thread to run it, set running thread id first. */
c->running_tid = IOTHREAD_MAIN_THREAD_ID;
/* If a read error occurs, handle it in the main thread first, since we
* want to print logs about client information before freeing. */
if (c->read_error) handleClientReadError(c);
/* The client is asked to close in IO thread. */
if (c->io_flags & CLIENT_IO_CLOSE_ASAP) {
freeClient(c);
continue;
}
/* Update the client in the mem usage */
updateClientMemUsageAndBucket(c);
/* Process the pending command and input buffer. */
if (!c->read_error && c->io_flags & CLIENT_IO_PENDING_COMMAND) {
c->flags |= CLIENT_PENDING_COMMAND;
if (processPendingCommandAndInputBuffer(c) == C_ERR) {
/* If the client is no longer valid, it must be freed safely. */
continue;
}
}
/* We may have pending replies if io thread may not finish writing
* reply to client, so we did not put the client in pending write
* queue. And we should do that first since we may keep the client
* in main thread instead of returning to io threads. */
if (!(c->flags & CLIENT_PENDING_WRITE) && clientHasPendingReplies(c))
putClientInPendingWriteQueue(c);
/* The client only can be processed in the main thread, otherwise data
* race will happen, since we may touch client's data in main thread. */
if (isClientMustHandledByMainThread(c)) {
keepClientInMainThread(c);
continue;
}
/* Remove this client from pending write clients queue of main thread,
* And some clients may do not have reply if CLIENT REPLY OFF/SKIP. */
if (c->flags & CLIENT_PENDING_WRITE) {
c->flags &= ~CLIENT_PENDING_WRITE;
listUnlinkNode(server.clients_pending_write, &c->clients_pending_write_node);
}
c->running_tid = c->tid;
listLinkNodeHead(mainThreadPendingClientsToIOThreads[c->tid], node);
node = NULL;
}
if (node) zfree(node);
/* Trigger the io thread to handle these clients ASAP to make them processed
* in parallel.
*
* If AOF fsync policy is always, we should not let io thread handle these
* clients now since we don't flush AOF buffer to file and sync yet.
* So these clients will be delayed to send io threads in beforeSleep after
* flushAppendOnlyFile.
*
* If we are in processEventsWhileBlocked, we don't send clients to io threads
* now, we want to update server.events_processed_while_blocked accurately. */
if (listLength(mainThreadPendingClientsToIOThreads[t->id]) &&
server.aof_fsync != AOF_FSYNC_ALWAYS &&
!ProcessingEventsWhileBlocked)
{
pthread_mutex_lock(&(t->pending_clients_mutex));
listJoin(t->pending_clients, mainThreadPendingClientsToIOThreads[t->id]);
pthread_mutex_unlock(&(t->pending_clients_mutex));
triggerEventNotifier(t->pending_clients_notifier);
}
}
/* When the io thread finishes processing the client with the read event, it will
* notify the main thread through event triggering in IOThreadBeforeSleep. The main
* thread handles the event through this function. */
void handleClientsFromIOThread(struct aeEventLoop *el, int fd, void *ptr, int mask) {
UNUSED(el);
UNUSED(mask);
IOThread *t = ptr;
/* Handle fd event first. */
serverAssert(fd == getReadEventFd(mainThreadPendingClientsNotifiers[t->id]));
handleEventNotifier(mainThreadPendingClientsNotifiers[t->id]);
/* Get the list of clients to process. */
pthread_mutex_lock(&mainThreadPendingClientsMutexes[t->id]);
listJoin(mainThreadProcessingClients[t->id], mainThreadPendingClients[t->id]);
pthread_mutex_unlock(&mainThreadPendingClientsMutexes[t->id]);
if (listLength(mainThreadProcessingClients[t->id]) == 0) return;
/* Process the clients from IO threads. */
processClientsFromIOThread(t);
}
/* In the new threaded io design, one thread may process multiple clients, so when
* an io thread notifies the main thread of an event, there may be multiple clients
* with commands that need to be processed. But in the event handler function
* handleClientsFromIOThread may be blocked when processing the specific command,
* the previous clients can not get a reply, and the subsequent clients can not be
* processed, so we need to handle this scenario in beforeSleep. The function is to
* process the commands of subsequent clients from io threads. And another function
* sendPendingClientsToIOThreads make sure clients from io thread can get replies.
* See also beforeSleep. */
void processClientsOfAllIOThreads(void) {
for (int i = 1; i < server.io_threads_num; i++) {
processClientsFromIOThread(&IOThreads[i]);
}
}
/* After the main thread processes the clients, it will send the clients back to
* io threads to handle, and fire an event, the io thread handles the event by
* this function. If the client is not binded to the event loop, we should bind
* it first and install read handler, and we don't uninstall client read handler
* unless freeing client. If the client has pending reply, we just reply to client
* first, and then install write handler if needed. */
void handleClientsFromMainThread(struct aeEventLoop *ae, int fd, void *ptr, int mask) {
UNUSED(ae);
UNUSED(mask);
IOThread *t = ptr;
/* Handle fd event first. */
serverAssert(fd == getReadEventFd(t->pending_clients_notifier));
handleEventNotifier(t->pending_clients_notifier);
pthread_mutex_lock(&t->pending_clients_mutex);
listJoin(t->processing_clients, t->pending_clients);
pthread_mutex_unlock(&t->pending_clients_mutex);
if (listLength(t->processing_clients) == 0) return;
listIter li;
listNode *ln;
listRewind(t->processing_clients, &li);
while((ln = listNext(&li))) {
client *c = listNodeValue(ln);
serverAssert(!(c->io_flags & (CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED)));
/* Main thread must handle clients with CLIENT_CLOSE_ASAP flag, since
* we only set io_flags when clients in io thread are freed ASAP. */
serverAssert(!(c->flags & CLIENT_CLOSE_ASAP));
/* Link client in IO thread clients list first. */
serverAssert(c->io_thread_client_list_node == NULL);
listAddNodeTail(t->clients, c);
c->io_thread_client_list_node = listLast(t->clients);
/* The client is asked to close, we just let main thread free it. */
if (c->io_flags & CLIENT_IO_CLOSE_ASAP) {
enqueuePendingClientsToMainThread(c, 1);
continue;
}
/* Enable read and write and reset some flags. */
c->io_flags |= CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED;
c->io_flags &= ~CLIENT_IO_PENDING_COMMAND;
/* Only bind once, we never remove read handler unless freeing client. */
if (!connHasEventLoop(c->conn)) {
connRebindEventLoop(c->conn, t->el);
serverAssert(!connHasReadHandler(c->conn));
connSetReadHandler(c->conn, readQueryFromClient);
}
/* If the client has pending replies, write replies to client. */
if (clientHasPendingReplies(c)) {
writeToClient(c, 0);
if (!(c->io_flags & CLIENT_IO_CLOSE_ASAP) && clientHasPendingReplies(c)) {
connSetWriteHandler(c->conn, sendReplyToClient);
}
}
}
listEmpty(t->processing_clients);
}
void IOThreadBeforeSleep(struct aeEventLoop *el) {
IOThread *t = el->privdata[0];
/* Handle pending data(typical TLS). */
connTypeProcessPendingData(el);
/* If any connection type(typical TLS) still has pending unread data don't sleep at all. */
aeSetDontWait(el, connTypeHasPendingData(el));
/* Check if i am being paused, pause myself and resume. */
handlePauseAndResume(t);
/* Check if there are clients to be processed in main thread, and then join
* them to the list of main thread. */
if (listLength(t->pending_clients_to_main_thread) > 0) {
pthread_mutex_lock(&mainThreadPendingClientsMutexes[t->id]);
listJoin(mainThreadPendingClients[t->id], t->pending_clients_to_main_thread);
pthread_mutex_unlock(&mainThreadPendingClientsMutexes[t->id]);
/* Trigger an event, maybe an error is returned when buffer is full
* if using pipe, but no worry, main thread will handle all clients
* in list when receiving a notification. */
triggerEventNotifier(mainThreadPendingClientsNotifiers[t->id]);
}
}
/* The main function of IO thread, it will run an event loop. The mian thread
* and IO thread will communicate through event notifier. */
void *IOThreadMain(void *ptr) {
IOThread *t = ptr;
char thdname[16];
snprintf(thdname, sizeof(thdname), "io_thd_%d", t->id);
redis_set_thread_title(thdname);
redisSetCpuAffinity(server.server_cpulist);
makeThreadKillable();
aeSetBeforeSleepProc(t->el, IOThreadBeforeSleep);
aeMain(t->el);
return NULL;
}
/* Initialize the data structures needed for threaded I/O. */
void initThreadedIO(void) {
if (server.io_threads_num <= 1) return;
server.io_threads_active = 1;
if (server.io_threads_num > IO_THREADS_MAX_NUM) {
serverLog(LL_WARNING,"Fatal: too many I/O threads configured. "
"The maximum number is %d.", IO_THREADS_MAX_NUM);
exit(1);
}
/* Spawn and initialize the I/O threads. */
for (int i = 1; i < server.io_threads_num; i++) {
IOThread *t = &IOThreads[i];
t->id = i;
t->el = aeCreateEventLoop(server.maxclients+CONFIG_FDSET_INCR);
t->el->privdata[0] = t;
t->pending_clients = listCreate();
t->processing_clients = listCreate();
t->pending_clients_to_main_thread = listCreate();
t->clients = listCreate();
atomicSetWithSync(t->paused, IO_THREAD_UNPAUSED);
pthread_mutexattr_t *attr = NULL;
#if defined(__linux__) && defined(__GLIBC__)
attr = zmalloc(sizeof(pthread_mutexattr_t));
pthread_mutexattr_init(attr);
pthread_mutexattr_settype(attr, PTHREAD_MUTEX_ADAPTIVE_NP);
#endif
pthread_mutex_init(&t->pending_clients_mutex, attr);
t->pending_clients_notifier = createEventNotifier();
if (aeCreateFileEvent(t->el, getReadEventFd(t->pending_clients_notifier),
AE_READABLE, handleClientsFromMainThread, t) != AE_OK)
{
serverLog(LL_WARNING, "Fatal: Can't register file event for IO thread notifications.");
exit(1);
}
/* Create IO thread */
if (pthread_create(&t->tid, NULL, IOThreadMain, (void*)t) != 0) {
serverLog(LL_WARNING, "Fatal: Can't initialize IO thread.");
exit(1);
}
/* For main thread */
mainThreadPendingClientsToIOThreads[i] = listCreate();
mainThreadPendingClients[i] = listCreate();
mainThreadProcessingClients[i] = listCreate();
pthread_mutex_init(&mainThreadPendingClientsMutexes[i], attr);
mainThreadPendingClientsNotifiers[i] = createEventNotifier();
if (aeCreateFileEvent(server.el, getReadEventFd(mainThreadPendingClientsNotifiers[i]),
AE_READABLE, handleClientsFromIOThread, t) != AE_OK)
{
serverLog(LL_WARNING, "Fatal: Can't register file event for main thread notifications.");
exit(1);
}
if (attr) zfree(attr);
}
}
/* Kill the IO threads, TODO: release the applied resources. */
void killIOThreads(void) {
if (server.io_threads_num <= 1) return;
int err, j;
for (j = 1; j < server.io_threads_num; j++) {
if (IOThreads[j].tid == pthread_self()) continue;
if (IOThreads[j].tid && pthread_cancel(IOThreads[j].tid) == 0) {
if ((err = pthread_join(IOThreads[j].tid,NULL)) != 0) {
serverLog(LL_WARNING,
"IO thread(tid:%lu) can not be joined: %s",
(unsigned long)IOThreads[j].tid, strerror(err));
} else {
serverLog(LL_WARNING,
"IO thread(tid:%lu) terminated",(unsigned long)IOThreads[j].tid);
}
}
}
}

View File

@ -42,6 +42,7 @@ struct _kvstore {
unsigned long long *dict_size_index; /* Binary indexed tree (BIT) that describes cumulative key frequencies up until given dict-index. */ unsigned long long *dict_size_index; /* Binary indexed tree (BIT) that describes cumulative key frequencies up until given dict-index. */
size_t overhead_hashtable_lut; /* The overhead of all dictionaries. */ size_t overhead_hashtable_lut; /* The overhead of all dictionaries. */
size_t overhead_hashtable_rehashing; /* The overhead of dictionaries rehashing. */ size_t overhead_hashtable_rehashing; /* The overhead of dictionaries rehashing. */
void *metadata[]; /* conditionally allocated based on "flags" */
}; };
/* Structure for kvstore iterator that allows iterating across multiple dicts. */ /* Structure for kvstore iterator that allows iterating across multiple dicts. */
@ -59,10 +60,17 @@ struct _kvstoreDictIterator {
dictIterator di; dictIterator di;
}; };
/* Dict metadata for database, used for record the position in rehashing list. */ /* Basic metadata allocated per dict */
typedef struct { typedef struct {
listNode *rehashing_node; /* list node in rehashing list */ listNode *rehashing_node; /* list node in rehashing list */
} kvstoreDictMetadata; } kvstoreDictMetaBase;
/* Conditionally metadata allocated per dict (specifically for keysizes histogram) */
typedef struct {
kvstoreDictMetaBase base; /* must be first in struct ! */
/* External metadata */
kvstoreDictMetadata meta;
} kvstoreDictMetaEx;
/**********************************/ /**********************************/
/*** Helpers **********************/ /*** Helpers **********************/
@ -184,7 +192,7 @@ static void freeDictIfNeeded(kvstore *kvs, int didx) {
* If there's one dict, bucket count can be retrieved directly from single dict bucket. */ * If there's one dict, bucket count can be retrieved directly from single dict bucket. */
static void kvstoreDictRehashingStarted(dict *d) { static void kvstoreDictRehashingStarted(dict *d) {
kvstore *kvs = d->type->userdata; kvstore *kvs = d->type->userdata;
kvstoreDictMetadata *metadata = (kvstoreDictMetadata *)dictMetadata(d); kvstoreDictMetaBase *metadata = (kvstoreDictMetaBase *)dictMetadata(d);
listAddNodeTail(kvs->rehashing, d); listAddNodeTail(kvs->rehashing, d);
metadata->rehashing_node = listLast(kvs->rehashing); metadata->rehashing_node = listLast(kvs->rehashing);
@ -201,7 +209,7 @@ static void kvstoreDictRehashingStarted(dict *d) {
* the old ht size of the dictionary from the total sum of buckets for a DB. */ * the old ht size of the dictionary from the total sum of buckets for a DB. */
static void kvstoreDictRehashingCompleted(dict *d) { static void kvstoreDictRehashingCompleted(dict *d) {
kvstore *kvs = d->type->userdata; kvstore *kvs = d->type->userdata;
kvstoreDictMetadata *metadata = (kvstoreDictMetadata *)dictMetadata(d); kvstoreDictMetaBase *metadata = (kvstoreDictMetaBase *)dictMetadata(d);
if (metadata->rehashing_node) { if (metadata->rehashing_node) {
listDelNode(kvs->rehashing, metadata->rehashing_node); listDelNode(kvs->rehashing, metadata->rehashing_node);
metadata->rehashing_node = NULL; metadata->rehashing_node = NULL;
@ -214,10 +222,15 @@ static void kvstoreDictRehashingCompleted(dict *d) {
kvs->overhead_hashtable_rehashing -= from; kvs->overhead_hashtable_rehashing -= from;
} }
/* Returns the size of the DB dict metadata in bytes. */ /* Returns the size of the DB dict base metadata in bytes. */
static size_t kvstoreDictMetadataSize(dict *d) { static size_t kvstoreDictMetaBaseSize(dict *d) {
UNUSED(d); UNUSED(d);
return sizeof(kvstoreDictMetadata); return sizeof(kvstoreDictMetaBase);
}
/* Returns the size of the DB dict extended metadata in bytes. */
static size_t kvstoreDictMetadataExtendSize(dict *d) {
UNUSED(d);
return sizeof(kvstoreDictMetaEx);
} }
/**********************************/ /**********************************/
@ -232,7 +245,13 @@ kvstore *kvstoreCreate(dictType *type, int num_dicts_bits, int flags) {
* for the dict cursor, see kvstoreScan */ * for the dict cursor, see kvstoreScan */
assert(num_dicts_bits <= 16); assert(num_dicts_bits <= 16);
kvstore *kvs = zcalloc(sizeof(*kvs)); /* Calc kvstore size */
size_t kvsize = sizeof(kvstore);
/* Conditionally calc also histogram size */
if (flags & KVSTORE_ALLOC_META_KEYS_HIST)
kvsize += sizeof(kvstoreMetadata);
kvstore *kvs = zcalloc(kvsize);
memcpy(&kvs->dtype, type, sizeof(kvs->dtype)); memcpy(&kvs->dtype, type, sizeof(kvs->dtype));
kvs->flags = flags; kvs->flags = flags;
@ -243,7 +262,10 @@ kvstore *kvstoreCreate(dictType *type, int num_dicts_bits, int flags) {
assert(!type->rehashingStarted); assert(!type->rehashingStarted);
assert(!type->rehashingCompleted); assert(!type->rehashingCompleted);
kvs->dtype.userdata = kvs; kvs->dtype.userdata = kvs;
kvs->dtype.dictMetadataBytes = kvstoreDictMetadataSize; if (flags & KVSTORE_ALLOC_META_KEYS_HIST)
kvs->dtype.dictMetadataBytes = kvstoreDictMetadataExtendSize;
else
kvs->dtype.dictMetadataBytes = kvstoreDictMetaBaseSize;
kvs->dtype.rehashingStarted = kvstoreDictRehashingStarted; kvs->dtype.rehashingStarted = kvstoreDictRehashingStarted;
kvs->dtype.rehashingCompleted = kvstoreDictRehashingCompleted; kvs->dtype.rehashingCompleted = kvstoreDictRehashingCompleted;
@ -263,7 +285,6 @@ kvstore *kvstoreCreate(dictType *type, int num_dicts_bits, int flags) {
kvs->bucket_count = 0; kvs->bucket_count = 0;
kvs->overhead_hashtable_lut = 0; kvs->overhead_hashtable_lut = 0;
kvs->overhead_hashtable_rehashing = 0; kvs->overhead_hashtable_rehashing = 0;
return kvs; return kvs;
} }
@ -272,9 +293,13 @@ void kvstoreEmpty(kvstore *kvs, void(callback)(dict*)) {
dict *d = kvstoreGetDict(kvs, didx); dict *d = kvstoreGetDict(kvs, didx);
if (!d) if (!d)
continue; continue;
kvstoreDictMetadata *metadata = (kvstoreDictMetadata *)dictMetadata(d); kvstoreDictMetaBase *metadata = (kvstoreDictMetaBase *)dictMetadata(d);
if (metadata->rehashing_node) if (metadata->rehashing_node)
metadata->rehashing_node = NULL; metadata->rehashing_node = NULL;
if (kvs->flags & KVSTORE_ALLOC_META_KEYS_HIST) {
kvstoreDictMetaEx *metaExt = (kvstoreDictMetaEx *) metadata;
memset(&metaExt->meta.keysizes_hist, 0, sizeof(metaExt->meta.keysizes_hist));
}
dictEmpty(d, callback); dictEmpty(d, callback);
freeDictIfNeeded(kvs, didx); freeDictIfNeeded(kvs, didx);
} }
@ -296,7 +321,7 @@ void kvstoreRelease(kvstore *kvs) {
dict *d = kvstoreGetDict(kvs, didx); dict *d = kvstoreGetDict(kvs, didx);
if (!d) if (!d)
continue; continue;
kvstoreDictMetadata *metadata = (kvstoreDictMetadata *)dictMetadata(d); kvstoreDictMetaBase *metadata = (kvstoreDictMetaBase *)dictMetadata(d);
if (metadata->rehashing_node) if (metadata->rehashing_node)
metadata->rehashing_node = NULL; metadata->rehashing_node = NULL;
dictRelease(d); dictRelease(d);
@ -330,11 +355,15 @@ unsigned long kvstoreBuckets(kvstore *kvs) {
size_t kvstoreMemUsage(kvstore *kvs) { size_t kvstoreMemUsage(kvstore *kvs) {
size_t mem = sizeof(*kvs); size_t mem = sizeof(*kvs);
size_t metaSize = sizeof(kvstoreDictMetaBase);
if (kvs->flags & KVSTORE_ALLOC_META_KEYS_HIST)
metaSize = sizeof(kvstoreDictMetaEx);
unsigned long long keys_count = kvstoreSize(kvs); unsigned long long keys_count = kvstoreSize(kvs);
mem += keys_count * dictEntryMemUsage() + mem += keys_count * dictEntryMemUsage() +
kvstoreBuckets(kvs) * sizeof(dictEntry*) + kvstoreBuckets(kvs) * sizeof(dictEntry*) +
kvs->allocated_dicts * (sizeof(dict) + kvstoreDictMetadataSize(NULL)); kvs->allocated_dicts * (sizeof(dict) + metaSize);
/* Values are dict* shared with kvs->dicts */ /* Values are dict* shared with kvs->dicts */
mem += listLength(kvs->rehashing) * sizeof(listNode); mem += listLength(kvs->rehashing) * sizeof(listNode);
@ -737,12 +766,12 @@ dictEntry *kvstoreDictGetFairRandomKey(kvstore *kvs, int didx)
return dictGetFairRandomKey(d); return dictGetFairRandomKey(d);
} }
dictEntry *kvstoreDictFindEntryByPtrAndHash(kvstore *kvs, int didx, const void *oldptr, uint64_t hash) dictEntry *kvstoreDictFindByHashAndPtr(kvstore *kvs, int didx, const void *oldptr, uint64_t hash)
{ {
dict *d = kvstoreGetDict(kvs, didx); dict *d = kvstoreGetDict(kvs, didx);
if (!d) if (!d)
return NULL; return NULL;
return dictFindEntryByPtrAndHash(d, oldptr, hash); return dictFindByHashAndPtr(d, oldptr, hash);
} }
unsigned int kvstoreDictGetSomeKeys(kvstore *kvs, int didx, dictEntry **des, unsigned int count) unsigned int kvstoreDictGetSomeKeys(kvstore *kvs, int didx, dictEntry **des, unsigned int count)
@ -785,7 +814,7 @@ void kvstoreDictLUTDefrag(kvstore *kvs, kvstoreDictLUTDefragFunction *defragfn)
/* After defragmenting the dict, update its corresponding /* After defragmenting the dict, update its corresponding
* rehashing node in the kvstore's rehashing list. */ * rehashing node in the kvstore's rehashing list. */
kvstoreDictMetadata *metadata = (kvstoreDictMetadata *)dictMetadata(*d); kvstoreDictMetaBase *metadata = (kvstoreDictMetaBase *)dictMetadata(*d);
if (metadata->rehashing_node) if (metadata->rehashing_node)
metadata->rehashing_node->value = *d; metadata->rehashing_node->value = *d;
} }
@ -856,6 +885,19 @@ int kvstoreDictDelete(kvstore *kvs, int didx, const void *key) {
return ret; return ret;
} }
kvstoreDictMetadata *kvstoreGetDictMetadata(kvstore *kvs, int didx) {
dict *d = kvstoreGetDict(kvs, didx);
if ((!d) || (!(kvs->flags & KVSTORE_ALLOC_META_KEYS_HIST)))
return NULL;
kvstoreDictMetaEx *metadata = (kvstoreDictMetaEx *)dictMetadata(d);
return &(metadata->meta);
}
kvstoreMetadata *kvstoreGetMetadata(kvstore *kvs) {
return (kvstoreMetadata *) &kvs->metadata;
}
#ifdef REDIS_TEST #ifdef REDIS_TEST
#include <stdio.h> #include <stdio.h>
#include "testhelp.h" #include "testhelp.h"
@ -1029,7 +1071,8 @@ int kvstoreTest(int argc, char **argv, int flags) {
} }
TEST("Verify non-empty dict count is correctly updated") { TEST("Verify non-empty dict count is correctly updated") {
kvstore *kvs = kvstoreCreate(&KvstoreDictTestType, 2, KVSTORE_ALLOCATE_DICTS_ON_DEMAND); kvstore *kvs = kvstoreCreate(&KvstoreDictTestType, 2,
KVSTORE_ALLOCATE_DICTS_ON_DEMAND | KVSTORE_ALLOC_META_KEYS_HIST);
for (int idx = 0; idx < 4; idx++) { for (int idx = 0; idx < 4; idx++) {
for (i = 0; i < 16; i++) { for (i = 0; i < 16; i++) {
de = kvstoreDictAddRaw(kvs, idx, stringFromInt(i), NULL); de = kvstoreDictAddRaw(kvs, idx, stringFromInt(i), NULL);

View File

@ -4,6 +4,21 @@
#include "dict.h" #include "dict.h"
#include "adlist.h" #include "adlist.h"
/* maximum number of bins of keysizes histogram */
#define MAX_KEYSIZES_BINS 48
#define MAX_KEYSIZES_TYPES 5 /* static_assert at db.c verifies == OBJ_TYPE_BASIC_MAX */
/* When creating kvstore with flag `KVSTORE_ALLOC_META_KEYS_HIST`, then kvstore
* alloc and memset struct kvstoreMetadata on init, yet, managed outside kvstore */
typedef struct {
uint64_t keysizes_hist[MAX_KEYSIZES_TYPES][MAX_KEYSIZES_BINS];
} kvstoreMetadata;
/* Like kvstoreMetadata, this one per dict */
typedef struct {
uint64_t keysizes_hist[MAX_KEYSIZES_TYPES][MAX_KEYSIZES_BINS];
} kvstoreDictMetadata;
typedef struct _kvstore kvstore; typedef struct _kvstore kvstore;
typedef struct _kvstoreIterator kvstoreIterator; typedef struct _kvstoreIterator kvstoreIterator;
typedef struct _kvstoreDictIterator kvstoreDictIterator; typedef struct _kvstoreDictIterator kvstoreDictIterator;
@ -13,6 +28,7 @@ typedef int (kvstoreExpandShouldSkipDictIndex)(int didx);
#define KVSTORE_ALLOCATE_DICTS_ON_DEMAND (1<<0) #define KVSTORE_ALLOCATE_DICTS_ON_DEMAND (1<<0)
#define KVSTORE_FREE_EMPTY_DICTS (1<<1) #define KVSTORE_FREE_EMPTY_DICTS (1<<1)
#define KVSTORE_ALLOC_META_KEYS_HIST (1<<2) /* Alloc keysizes histogram */
kvstore *kvstoreCreate(dictType *type, int num_dicts_bits, int flags); kvstore *kvstoreCreate(dictType *type, int num_dicts_bits, int flags);
void kvstoreEmpty(kvstore *kvs, void(callback)(dict*)); void kvstoreEmpty(kvstore *kvs, void(callback)(dict*));
void kvstoreRelease(kvstore *kvs); void kvstoreRelease(kvstore *kvs);
@ -57,7 +73,7 @@ void kvstoreReleaseDictIterator(kvstoreDictIterator *kvs_id);
dictEntry *kvstoreDictIteratorNext(kvstoreDictIterator *kvs_di); dictEntry *kvstoreDictIteratorNext(kvstoreDictIterator *kvs_di);
dictEntry *kvstoreDictGetRandomKey(kvstore *kvs, int didx); dictEntry *kvstoreDictGetRandomKey(kvstore *kvs, int didx);
dictEntry *kvstoreDictGetFairRandomKey(kvstore *kvs, int didx); dictEntry *kvstoreDictGetFairRandomKey(kvstore *kvs, int didx);
dictEntry *kvstoreDictFindEntryByPtrAndHash(kvstore *kvs, int didx, const void *oldptr, uint64_t hash); dictEntry *kvstoreDictFindByHashAndPtr(kvstore *kvs, int didx, const void *oldptr, uint64_t hash);
unsigned int kvstoreDictGetSomeKeys(kvstore *kvs, int didx, dictEntry **des, unsigned int count); unsigned int kvstoreDictGetSomeKeys(kvstore *kvs, int didx, dictEntry **des, unsigned int count);
int kvstoreDictExpand(kvstore *kvs, int didx, unsigned long size); int kvstoreDictExpand(kvstore *kvs, int didx, unsigned long size);
unsigned long kvstoreDictScanDefrag(kvstore *kvs, int didx, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata); unsigned long kvstoreDictScanDefrag(kvstore *kvs, int didx, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata);
@ -71,6 +87,8 @@ void kvstoreDictSetVal(kvstore *kvs, int didx, dictEntry *de, void *val);
dictEntry *kvstoreDictTwoPhaseUnlinkFind(kvstore *kvs, int didx, const void *key, dictEntry ***plink, int *table_index); dictEntry *kvstoreDictTwoPhaseUnlinkFind(kvstore *kvs, int didx, const void *key, dictEntry ***plink, int *table_index);
void kvstoreDictTwoPhaseUnlinkFree(kvstore *kvs, int didx, dictEntry *he, dictEntry **plink, int table_index); void kvstoreDictTwoPhaseUnlinkFree(kvstore *kvs, int didx, dictEntry *he, dictEntry **plink, int table_index);
int kvstoreDictDelete(kvstore *kvs, int didx, const void *key); int kvstoreDictDelete(kvstore *kvs, int didx, const void *key);
kvstoreDictMetadata *kvstoreGetDictMetadata(kvstore *kvs, int didx);
kvstoreMetadata *kvstoreGetMetadata(kvstore *kvs);
#ifdef REDIS_TEST #ifdef REDIS_TEST
int kvstoreTest(int argc, char *argv[], int flags); int kvstoreTest(int argc, char *argv[], int flags);

View File

@ -203,7 +203,7 @@ sds createLatencyReport(void) {
if (dictSize(server.latency_events) == 0 && if (dictSize(server.latency_events) == 0 &&
server.latency_monitor_threshold == 0) server.latency_monitor_threshold == 0)
{ {
report = sdscat(report,"I'm sorry, Dave, I can't do that. Latency monitoring is disabled in this Redis instance. You may use \"CONFIG SET latency-monitor-threshold <milliseconds>.\" in order to enable it. If we weren't in a deep space mission I'd suggest to take a look at https://redis.io/topics/latency-monitor.\n"); report = sdscat(report,"I'm sorry, Dave, I can't do that. Latency monitoring is disabled in this Redis instance. You may use \"CONFIG SET latency-monitor-threshold <milliseconds>.\" in order to enable it. If we weren't in a deep space mission I'd suggest to take a look at https://redis.io/docs/latest/operate/oss_and_stack/management/optimization/latency-monitor.\n");
return report; return report;
} }

View File

@ -207,7 +207,7 @@ void emptyDbAsync(redisDb *db) {
} }
kvstore *oldkeys = db->keys, *oldexpires = db->expires; kvstore *oldkeys = db->keys, *oldexpires = db->expires;
ebuckets oldHfe = db->hexpires; ebuckets oldHfe = db->hexpires;
db->keys = kvstoreCreate(&dbDictType, slot_count_bits, flags); db->keys = kvstoreCreate(&dbDictType, slot_count_bits, flags | KVSTORE_ALLOC_META_KEYS_HIST);
db->expires = kvstoreCreate(&dbExpiresDictType, slot_count_bits, flags); db->expires = kvstoreCreate(&dbExpiresDictType, slot_count_bits, flags);
db->hexpires = ebCreate(); db->hexpires = ebCreate();
atomicIncr(lazyfree_objects, kvstoreSize(oldkeys)); atomicIncr(lazyfree_objects, kvstoreSize(oldkeys));

View File

@ -231,6 +231,11 @@ void lpFree(unsigned char *lp) {
lp_free(lp); lp_free(lp);
} }
/* Generic version of lpFree. */
void lpFreeGeneric(void *lp) {
lp_free((unsigned char *)lp);
}
/* Shrink the memory to fit. */ /* Shrink the memory to fit. */
unsigned char* lpShrinkToFit(unsigned char *lp) { unsigned char* lpShrinkToFit(unsigned char *lp) {
size_t size = lpGetTotalBytes(lp); size_t size = lpGetTotalBytes(lp);
@ -369,6 +374,23 @@ static inline unsigned long lpEncodeBacklen(unsigned char *buf, uint64_t l) {
} }
} }
/* Calculate the number of bytes required to reverse-encode a variable length
* field representing the length of the previous element of size 'l', ranging
* from 1 to 5. */
static inline unsigned long lpEncodeBacklenBytes(uint64_t l) {
if (l <= 127) {
return 1;
} else if (l < 16383) {
return 2;
} else if (l < 2097151) {
return 3;
} else if (l < 268435455) {
return 4;
} else {
return 5;
}
}
/* Decode the backlen and returns it. If the encoding looks invalid (more than /* Decode the backlen and returns it. If the encoding looks invalid (more than
* 5 bytes are used), UINT64_MAX is returned to report the problem. */ * 5 bytes are used), UINT64_MAX is returned to report the problem. */
static inline uint64_t lpDecodeBacklen(unsigned char *p) { static inline uint64_t lpDecodeBacklen(unsigned char *p) {
@ -431,17 +453,17 @@ static inline uint32_t lpCurrentEncodedSizeUnsafe(unsigned char *p) {
* This includes just the encoding byte, and the bytes needed to encode the length * This includes just the encoding byte, and the bytes needed to encode the length
* of the element (excluding the element data itself) * of the element (excluding the element data itself)
* If the element encoding is wrong then 0 is returned. */ * If the element encoding is wrong then 0 is returned. */
static inline uint32_t lpCurrentEncodedSizeBytes(unsigned char *p) { static inline uint32_t lpCurrentEncodedSizeBytes(const unsigned char encoding) {
if (LP_ENCODING_IS_7BIT_UINT(p[0])) return 1; if (LP_ENCODING_IS_7BIT_UINT(encoding)) return 1;
if (LP_ENCODING_IS_6BIT_STR(p[0])) return 1; if (LP_ENCODING_IS_6BIT_STR(encoding)) return 1;
if (LP_ENCODING_IS_13BIT_INT(p[0])) return 1; if (LP_ENCODING_IS_13BIT_INT(encoding)) return 1;
if (LP_ENCODING_IS_16BIT_INT(p[0])) return 1; if (LP_ENCODING_IS_16BIT_INT(encoding)) return 1;
if (LP_ENCODING_IS_24BIT_INT(p[0])) return 1; if (LP_ENCODING_IS_24BIT_INT(encoding)) return 1;
if (LP_ENCODING_IS_32BIT_INT(p[0])) return 1; if (LP_ENCODING_IS_32BIT_INT(encoding)) return 1;
if (LP_ENCODING_IS_64BIT_INT(p[0])) return 1; if (LP_ENCODING_IS_64BIT_INT(encoding)) return 1;
if (LP_ENCODING_IS_12BIT_STR(p[0])) return 2; if (LP_ENCODING_IS_12BIT_STR(encoding)) return 2;
if (LP_ENCODING_IS_32BIT_STR(p[0])) return 5; if (LP_ENCODING_IS_32BIT_STR(encoding)) return 5;
if (p[0] == LP_EOF) return 1; if (encoding == LP_EOF) return 1;
return 0; return 0;
} }
@ -449,13 +471,22 @@ static inline uint32_t lpCurrentEncodedSizeBytes(unsigned char *p) {
* function if the current element is the EOF element at the end of the * function if the current element is the EOF element at the end of the
* listpack, however, while this function is used to implement lpNext(), * listpack, however, while this function is used to implement lpNext(),
* it does not return NULL when the EOF element is encountered. */ * it does not return NULL when the EOF element is encountered. */
unsigned char *lpSkip(unsigned char *p) { static inline unsigned char *lpSkip(unsigned char *p) {
unsigned long entrylen = lpCurrentEncodedSizeUnsafe(p); unsigned long entrylen = lpCurrentEncodedSizeUnsafe(p);
entrylen += lpEncodeBacklen(NULL,entrylen); entrylen += lpEncodeBacklenBytes(entrylen);
p += entrylen; p += entrylen;
return p; return p;
} }
/* This is similar to lpNext() but avoids the inner call to lpBytes when you already know the listpack size. */
unsigned char *lpNextWithBytes(unsigned char *lp, unsigned char *p, const size_t lpbytes) {
assert(p);
p = lpSkip(p);
if (p[0] == LP_EOF) return NULL;
lpAssertValidEntry(lp, lpbytes, p);
return p;
}
/* If 'p' points to an element of the listpack, calling lpNext() will return /* If 'p' points to an element of the listpack, calling lpNext() will return
* the pointer to the next element (the one on the right), or NULL if 'p' * the pointer to the next element (the one on the right), or NULL if 'p'
* already pointed to the last element of the listpack. */ * already pointed to the last element of the listpack. */
@ -475,7 +506,7 @@ unsigned char *lpPrev(unsigned char *lp, unsigned char *p) {
if (p-lp == LP_HDR_SIZE) return NULL; if (p-lp == LP_HDR_SIZE) return NULL;
p--; /* Seek the first backlen byte of the last element. */ p--; /* Seek the first backlen byte of the last element. */
uint64_t prevlen = lpDecodeBacklen(p); uint64_t prevlen = lpDecodeBacklen(p);
prevlen += lpEncodeBacklen(NULL,prevlen); prevlen += lpEncodeBacklenBytes(prevlen);
p -= prevlen-1; /* Seek the first byte of the previous entry. */ p -= prevlen-1; /* Seek the first byte of the previous entry. */
lpAssertValidEntry(lp, lpBytes(lp), p); lpAssertValidEntry(lp, lpBytes(lp), p);
return p; return p;
@ -569,7 +600,7 @@ static inline unsigned char *lpGetWithSize(unsigned char *p, int64_t *count, uns
if (entry_size) *entry_size = LP_ENCODING_7BIT_UINT_ENTRY_SIZE; if (entry_size) *entry_size = LP_ENCODING_7BIT_UINT_ENTRY_SIZE;
} else if (LP_ENCODING_IS_6BIT_STR(p[0])) { } else if (LP_ENCODING_IS_6BIT_STR(p[0])) {
*count = LP_ENCODING_6BIT_STR_LEN(p); *count = LP_ENCODING_6BIT_STR_LEN(p);
if (entry_size) *entry_size = 1 + *count + lpEncodeBacklen(NULL, *count + 1); if (entry_size) *entry_size = 1 + *count + lpEncodeBacklenBytes(*count + 1);
return p+1; return p+1;
} else if (LP_ENCODING_IS_13BIT_INT(p[0])) { } else if (LP_ENCODING_IS_13BIT_INT(p[0])) {
uval = ((p[0]&0x1f)<<8) | p[1]; uval = ((p[0]&0x1f)<<8) | p[1];
@ -611,11 +642,11 @@ static inline unsigned char *lpGetWithSize(unsigned char *p, int64_t *count, uns
if (entry_size) *entry_size = LP_ENCODING_64BIT_INT_ENTRY_SIZE; if (entry_size) *entry_size = LP_ENCODING_64BIT_INT_ENTRY_SIZE;
} else if (LP_ENCODING_IS_12BIT_STR(p[0])) { } else if (LP_ENCODING_IS_12BIT_STR(p[0])) {
*count = LP_ENCODING_12BIT_STR_LEN(p); *count = LP_ENCODING_12BIT_STR_LEN(p);
if (entry_size) *entry_size = 2 + *count + lpEncodeBacklen(NULL, *count + 2); if (entry_size) *entry_size = 2 + *count + lpEncodeBacklenBytes(*count + 2);
return p+2; return p+2;
} else if (LP_ENCODING_IS_32BIT_STR(p[0])) { } else if (LP_ENCODING_IS_32BIT_STR(p[0])) {
*count = LP_ENCODING_32BIT_STR_LEN(p); *count = LP_ENCODING_32BIT_STR_LEN(p);
if (entry_size) *entry_size = 5 + *count + lpEncodeBacklen(NULL, *count + 5); if (entry_size) *entry_size = 5 + *count + lpEncodeBacklenBytes(*count + 5);
return p+5; return p+5;
} else { } else {
uval = 12345678900000000ULL + p[0]; uval = 12345678900000000ULL + p[0];
@ -647,8 +678,99 @@ static inline unsigned char *lpGetWithSize(unsigned char *p, int64_t *count, uns
} }
} }
/* Return the listpack element pointed by 'p'.
*
* The function has the same behaviour as lpGetWithSize when 'entry_size' is NULL,
* but avoids a lot of unecesarry branching performance penalties. */
static inline unsigned char *lpGetWithBuf(unsigned char *p, int64_t *count, unsigned char *intbuf) {
int64_t val;
uint64_t uval, negstart, negmax;
assert(p); /* assertion for valgrind (avoid NPD) */
const unsigned char encoding = p[0];
/* string encoding */
if (LP_ENCODING_IS_6BIT_STR(encoding)) {
*count = LP_ENCODING_6BIT_STR_LEN(p);
return p+1;
}
if (LP_ENCODING_IS_12BIT_STR(encoding)) {
*count = LP_ENCODING_12BIT_STR_LEN(p);
return p+2;
}
if (LP_ENCODING_IS_32BIT_STR(encoding)) {
*count = LP_ENCODING_32BIT_STR_LEN(p);
return p+5;
}
/* int encoding */
if (LP_ENCODING_IS_7BIT_UINT(encoding)) {
negstart = UINT64_MAX; /* 7 bit ints are always positive. */
negmax = 0;
uval = encoding & 0x7f;
} else if (LP_ENCODING_IS_13BIT_INT(encoding)) {
uval = ((encoding&0x1f)<<8) | p[1];
negstart = (uint64_t)1<<12;
negmax = 8191;
} else if (LP_ENCODING_IS_16BIT_INT(encoding)) {
uval = (uint64_t)p[1] |
(uint64_t)p[2]<<8;
negstart = (uint64_t)1<<15;
negmax = UINT16_MAX;
} else if (LP_ENCODING_IS_24BIT_INT(encoding)) {
uval = (uint64_t)p[1] |
(uint64_t)p[2]<<8 |
(uint64_t)p[3]<<16;
negstart = (uint64_t)1<<23;
negmax = UINT32_MAX>>8;
} else if (LP_ENCODING_IS_32BIT_INT(encoding)) {
uval = (uint64_t)p[1] |
(uint64_t)p[2]<<8 |
(uint64_t)p[3]<<16 |
(uint64_t)p[4]<<24;
negstart = (uint64_t)1<<31;
negmax = UINT32_MAX;
} else if (LP_ENCODING_IS_64BIT_INT(encoding)) {
uval = (uint64_t)p[1] |
(uint64_t)p[2]<<8 |
(uint64_t)p[3]<<16 |
(uint64_t)p[4]<<24 |
(uint64_t)p[5]<<32 |
(uint64_t)p[6]<<40 |
(uint64_t)p[7]<<48 |
(uint64_t)p[8]<<56;
negstart = (uint64_t)1<<63;
negmax = UINT64_MAX;
} else {
uval = 12345678900000000ULL + encoding;
negstart = UINT64_MAX;
negmax = 0;
}
/* We reach this code path only for integer encodings.
* Convert the unsigned value to the signed one using two's complement
* rule. */
if (uval >= negstart) {
/* This three steps conversion should avoid undefined behaviors
* in the unsigned -> signed conversion. */
uval = negmax-uval;
val = uval;
val = -val-1;
} else {
val = uval;
}
/* Return the string representation of the integer or the value itself
* depending on intbuf being NULL or not. */
if (intbuf) {
*count = ll2string((char*)intbuf,LP_INTBUF_SIZE,(long long)val);
return intbuf;
} else {
*count = val;
return NULL;
}
}
unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf) { unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf) {
return lpGetWithSize(p, count, intbuf, NULL); return lpGetWithBuf(p, count, intbuf);
} }
/* This is just a wrapper to lpGet() that is able to get entry value directly. /* This is just a wrapper to lpGet() that is able to get entry value directly.
@ -880,7 +1002,7 @@ unsigned char *lpInsert(unsigned char *lp, unsigned char *elestr, unsigned char
uint32_t replaced_len = 0; uint32_t replaced_len = 0;
if (where == LP_REPLACE) { if (where == LP_REPLACE) {
replaced_len = lpCurrentEncodedSizeUnsafe(p); replaced_len = lpCurrentEncodedSizeUnsafe(p);
replaced_len += lpEncodeBacklen(NULL,replaced_len); replaced_len += lpEncodeBacklenBytes(replaced_len);
ASSERT_INTEGRITY_LEN(lp, p, replaced_len); ASSERT_INTEGRITY_LEN(lp, p, replaced_len);
} }
@ -1420,7 +1542,7 @@ size_t lpBytes(unsigned char *lp) {
size_t lpEntrySizeInteger(long long lval) { size_t lpEntrySizeInteger(long long lval) {
uint64_t enclen; uint64_t enclen;
lpEncodeIntegerGetType(lval, NULL, &enclen); lpEncodeIntegerGetType(lval, NULL, &enclen);
unsigned long backlen = lpEncodeBacklen(NULL, enclen); unsigned long backlen = lpEncodeBacklenBytes(enclen);
return enclen + backlen; return enclen + backlen;
} }
@ -1487,6 +1609,7 @@ unsigned char *lpValidateFirst(unsigned char *lp) {
/* Validate the integrity of a single listpack entry and move to the next one. /* Validate the integrity of a single listpack entry and move to the next one.
* The input argument 'pp' is a reference to the current record and is advanced on exit. * The input argument 'pp' is a reference to the current record and is advanced on exit.
* the data pointed to by 'lp' will not be modified by the function.
* Returns 1 if valid, 0 if invalid. */ * Returns 1 if valid, 0 if invalid. */
int lpValidateNext(unsigned char *lp, unsigned char **pp, size_t lpbytes) { int lpValidateNext(unsigned char *lp, unsigned char **pp, size_t lpbytes) {
#define OUT_OF_RANGE(p) ( \ #define OUT_OF_RANGE(p) ( \
@ -1506,7 +1629,7 @@ int lpValidateNext(unsigned char *lp, unsigned char **pp, size_t lpbytes) {
} }
/* check that we can read the encoded size */ /* check that we can read the encoded size */
uint32_t lenbytes = lpCurrentEncodedSizeBytes(p); uint32_t lenbytes = lpCurrentEncodedSizeBytes(p[0]);
if (!lenbytes) if (!lenbytes)
return 0; return 0;
@ -1516,7 +1639,7 @@ int lpValidateNext(unsigned char *lp, unsigned char **pp, size_t lpbytes) {
/* get the entry length and encoded backlen. */ /* get the entry length and encoded backlen. */
unsigned long entrylen = lpCurrentEncodedSizeUnsafe(p); unsigned long entrylen = lpCurrentEncodedSizeUnsafe(p);
unsigned long encodedBacklen = lpEncodeBacklen(NULL,entrylen); unsigned long encodedBacklen = lpEncodeBacklenBytes(entrylen);
entrylen += encodedBacklen; entrylen += encodedBacklen;
/* make sure the entry doesn't reach outside the edge of the listpack */ /* make sure the entry doesn't reach outside the edge of the listpack */
@ -1859,9 +1982,9 @@ void lpRepr(unsigned char *lp) {
p = lpFirst(lp); p = lpFirst(lp);
while(p) { while(p) {
uint32_t encoded_size_bytes = lpCurrentEncodedSizeBytes(p); uint32_t encoded_size_bytes = lpCurrentEncodedSizeBytes(p[0]);
uint32_t encoded_size = lpCurrentEncodedSizeUnsafe(p); uint32_t encoded_size = lpCurrentEncodedSizeUnsafe(p);
unsigned long back_len = lpEncodeBacklen(NULL, encoded_size); unsigned long back_len = lpEncodeBacklenBytes(encoded_size);
printf( printf(
"{\n" "{\n"
"\taddr: 0x%08lx,\n" "\taddr: 0x%08lx,\n"
@ -3002,7 +3125,7 @@ int listpackTest(int argc, char *argv[], int flags) {
for (i = 0; i < iteration; i++) { for (i = 0; i < iteration; i++) {
lp = lpNew(0); lp = lpNew(0);
ref = listCreate(); ref = listCreate();
listSetFreeMethod(ref,(void (*)(void*))sdsfree); listSetFreeMethod(ref, sdsfreegeneric);
len = rand() % 256; len = rand() % 256;
/* Create lists */ /* Create lists */

View File

@ -35,6 +35,7 @@ typedef struct {
unsigned char *lpNew(size_t capacity); unsigned char *lpNew(size_t capacity);
void lpFree(unsigned char *lp); void lpFree(unsigned char *lp);
void lpFreeGeneric(void *lp);
unsigned char* lpShrinkToFit(unsigned char *lp); unsigned char* lpShrinkToFit(unsigned char *lp);
unsigned char *lpInsertString(unsigned char *lp, unsigned char *s, uint32_t slen, unsigned char *lpInsertString(unsigned char *lp, unsigned char *s, uint32_t slen,
unsigned char *p, int where, unsigned char **newp); unsigned char *p, int where, unsigned char **newp);
@ -65,6 +66,7 @@ unsigned char *lpFindCb(unsigned char *lp, unsigned char *p, void *user, lpCmp c
unsigned char *lpFirst(unsigned char *lp); unsigned char *lpFirst(unsigned char *lp);
unsigned char *lpLast(unsigned char *lp); unsigned char *lpLast(unsigned char *lp);
unsigned char *lpNext(unsigned char *lp, unsigned char *p); unsigned char *lpNext(unsigned char *lp, unsigned char *p);
unsigned char *lpNextWithBytes(unsigned char *lp, unsigned char *p, const size_t lpbytes);
unsigned char *lpPrev(unsigned char *lp, unsigned char *p); unsigned char *lpPrev(unsigned char *lp, unsigned char *p);
size_t lpBytes(unsigned char *lp); size_t lpBytes(unsigned char *lp);
size_t lpEntrySizeInteger(long long lval); size_t lpEntrySizeInteger(long long lval);

View File

@ -437,7 +437,13 @@ typedef int (*RedisModuleConfigApplyFunc)(RedisModuleCtx *ctx, void *privdata, R
/* Struct representing a module config. These are stored in a list in the module struct */ /* Struct representing a module config. These are stored in a list in the module struct */
struct ModuleConfig { struct ModuleConfig {
sds name; /* Name of config without the module name appended to the front */ sds name; /* Fullname of the config (as it appears in the config file) */
sds alias; /* Optional alias for the configuration. NULL if none exists */
int unprefixedFlag; /* Indicates if the REDISMODULE_CONFIG_UNPREFIXED flag was set.
* If the configuration name was prefixed,during get_fn/set_fn
* callbacks, it should be reported without the prefix */
void *privdata; /* Optional data passed into the module config callbacks */ void *privdata; /* Optional data passed into the module config callbacks */
union get_fn { /* The get callback specified by the module */ union get_fn { /* The get callback specified by the module */
RedisModuleConfigGetStringFunc get_string; RedisModuleConfigGetStringFunc get_string;
@ -658,7 +664,7 @@ void moduleReleaseTempClient(client *c) {
c->bufpos = 0; c->bufpos = 0;
c->flags = CLIENT_MODULE; c->flags = CLIENT_MODULE;
c->user = NULL; /* Root user */ c->user = NULL; /* Root user */
c->cmd = c->lastcmd = c->realcmd = NULL; c->cmd = c->lastcmd = c->realcmd = c->iolookedcmd = NULL;
if (c->bstate.async_rm_call_handle) { if (c->bstate.async_rm_call_handle) {
RedisModuleAsyncRMCallPromise *promise = c->bstate.async_rm_call_handle; RedisModuleAsyncRMCallPromise *promise = c->bstate.async_rm_call_handle;
promise->c = NULL; /* Remove the client from the promise so it will no longer be possible to abort it. */ promise->c = NULL; /* Remove the client from the promise so it will no longer be possible to abort it. */
@ -983,7 +989,7 @@ int moduleGetCommandChannelsViaAPI(struct redisCommand *cmd, robj **argv, int ar
* *
* These functions are used to implement custom Redis commands. * These functions are used to implement custom Redis commands.
* *
* For examples, see https://redis.io/topics/modules-intro. * For examples, see https://redis.io/docs/latest/develop/reference/modules/.
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
/* Return non-zero if a module command, that was declared with the /* Return non-zero if a module command, that was declared with the
@ -1197,7 +1203,7 @@ RedisModuleCommand *moduleCreateCommandProxy(struct RedisModule *module, sds dec
* from the same input arguments and key values. * from the same input arguments and key values.
* Starting from Redis 7.0 this flag has been deprecated. * Starting from Redis 7.0 this flag has been deprecated.
* Declaring a command as "random" can be done using * Declaring a command as "random" can be done using
* command tips, see https://redis.io/topics/command-tips. * command tips, see https://redis.io/docs/latest/develop/reference/command-tips/.
* * **"allow-stale"**: The command is allowed to run on slaves that don't * * **"allow-stale"**: The command is allowed to run on slaves that don't
* serve stale data. Don't use if you don't know what * serve stale data. Don't use if you don't know what
* this means. * this means.
@ -1270,8 +1276,11 @@ int RM_CreateCommand(RedisModuleCtx *ctx, const char *name, RedisModuleCmdFunc c
RedisModuleCommand *cp = moduleCreateCommandProxy(ctx->module, declared_name, sdsdup(declared_name), cmdfunc, flags, firstkey, lastkey, keystep); RedisModuleCommand *cp = moduleCreateCommandProxy(ctx->module, declared_name, sdsdup(declared_name), cmdfunc, flags, firstkey, lastkey, keystep);
cp->rediscmd->arity = cmdfunc ? -1 : -2; /* Default value, can be changed later via dedicated API */ cp->rediscmd->arity = cmdfunc ? -1 : -2; /* Default value, can be changed later via dedicated API */
pauseAllIOThreads();
serverAssert(dictAdd(server.commands, sdsdup(declared_name), cp->rediscmd) == DICT_OK); serverAssert(dictAdd(server.commands, sdsdup(declared_name), cp->rediscmd) == DICT_OK);
serverAssert(dictAdd(server.orig_commands, sdsdup(declared_name), cp->rediscmd) == DICT_OK); serverAssert(dictAdd(server.orig_commands, sdsdup(declared_name), cp->rediscmd) == DICT_OK);
resumeAllIOThreads();
cp->rediscmd->id = ACLGetCommandID(declared_name); /* ID used for ACL. */ cp->rediscmd->id = ACLGetCommandID(declared_name); /* ID used for ACL. */
return REDISMODULE_OK; return REDISMODULE_OK;
} }
@ -1587,7 +1596,7 @@ int RM_SetCommandACLCategories(RedisModuleCommand *command, const char *aclflags
* both strings set to NULL. * both strings set to NULL.
* *
* - `tips`: A string of space-separated tips regarding this command, meant for * - `tips`: A string of space-separated tips regarding this command, meant for
* clients and proxies. See https://redis.io/topics/command-tips. * clients and proxies. See https://redis.io/docs/latest/develop/reference/command-tips/.
* *
* - `arity`: Number of arguments, including the command name itself. A positive * - `arity`: Number of arguments, including the command name itself. A positive
* number specifies an exact number of arguments and a negative number * number specifies an exact number of arguments and a negative number
@ -2253,12 +2262,16 @@ int moduleIsModuleCommand(void *module_handle, struct redisCommand *cmd) {
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
int moduleListConfigMatch(void *config, void *name) { int moduleListConfigMatch(void *config, void *name) {
return strcasecmp(((ModuleConfig *) config)->name, (char *) name) == 0; ModuleConfig *mc = (ModuleConfig *) config;
/* Compare the provided name with the config's name and alias if it exists */
return strcasecmp(mc->name, (char *) name) == 0 ||
((mc->alias) && strcasecmp(mc->alias, (char *) name) == 0);
} }
void moduleListFree(void *config) { void moduleListFree(void *config) {
ModuleConfig *module_config = (ModuleConfig *) config; ModuleConfig *module_config = (ModuleConfig *) config;
sdsfree(module_config->name); sdsfree(module_config->name);
sdsfree(module_config->alias);
zfree(config); zfree(config);
} }
@ -4171,15 +4184,7 @@ int RM_KeyType(RedisModuleKey *key) {
* If the key pointer is NULL or the key is empty, zero is returned. */ * If the key pointer is NULL or the key is empty, zero is returned. */
size_t RM_ValueLength(RedisModuleKey *key) { size_t RM_ValueLength(RedisModuleKey *key) {
if (key == NULL || key->value == NULL) return 0; if (key == NULL || key->value == NULL) return 0;
switch(key->value->type) { return getObjectLength(key->value);
case OBJ_STRING: return stringObjectLen(key->value);
case OBJ_LIST: return listTypeLength(key->value);
case OBJ_SET: return setTypeSize(key->value);
case OBJ_ZSET: return zsetLength(key->value);
case OBJ_HASH: return hashTypeLength(key->value, 0); /* OPEN: To subtract expired fields? */
case OBJ_STREAM: return streamLength(key->value);
default: return 0;
}
} }
/* If the key is open for writing, remove it, and setup the key to /* If the key is open for writing, remove it, and setup the key to
@ -5356,7 +5361,12 @@ int RM_HashSet(RedisModuleKey *key, int flags, ...) {
* expecting a RedisModuleString pointer to pointer, the function just * expecting a RedisModuleString pointer to pointer, the function just
* reports if the field exists or not and expects an integer pointer * reports if the field exists or not and expects an integer pointer
* as the second element of each pair. * as the second element of each pair.
* *
* REDISMODULE_HASH_EXPIRE_TIME: retrieves the expiration time of a field in the hash.
* The function expects a `mstime_t` pointer as the second element of each pair.
* If the field does not exist or has no expiration, the value is set to
* `REDISMODULE_NO_EXPIRE`. This flag must not be used with `REDISMODULE_HASH_EXISTS`.
*
* Example of REDISMODULE_HASH_CFIELDS: * Example of REDISMODULE_HASH_CFIELDS:
* *
* RedisModuleString *username, *hashedpass; * RedisModuleString *username, *hashedpass;
@ -5365,8 +5375,13 @@ int RM_HashSet(RedisModuleKey *key, int flags, ...) {
* Example of REDISMODULE_HASH_EXISTS: * Example of REDISMODULE_HASH_EXISTS:
* *
* int exists; * int exists;
* RedisModule_HashGet(mykey,REDISMODULE_HASH_EXISTS,argv[1],&exists,NULL); * RedisModule_HashGet(mykey,REDISMODULE_HASH_EXISTS,"username",&exists,NULL);
* *
* Example of REDISMODULE_HASH_EXPIRE_TIME:
*
* mstime_t hpExpireTime;
* RedisModule_HashGet(mykey,REDISMODULE_HASH_EXPIRE_TIME,"hp",&hpExpireTime,NULL);
*
* The function returns REDISMODULE_OK on success and REDISMODULE_ERR if * The function returns REDISMODULE_OK on success and REDISMODULE_ERR if
* the key is not a hash value. * the key is not a hash value.
* *
@ -5383,6 +5398,10 @@ int RM_HashGet(RedisModuleKey *key, int flags, ...) {
if (key->mode & REDISMODULE_OPEN_KEY_ACCESS_EXPIRED) if (key->mode & REDISMODULE_OPEN_KEY_ACCESS_EXPIRED)
hfeFlags = HFE_LAZY_ACCESS_EXPIRED; /* allow read also expired fields */ hfeFlags = HFE_LAZY_ACCESS_EXPIRED; /* allow read also expired fields */
/* Verify flag HASH_EXISTS is not set together with HASH_EXPIRE_TIME */
if ((flags & REDISMODULE_HASH_EXISTS) && (flags & REDISMODULE_HASH_EXPIRE_TIME))
return REDISMODULE_ERR;
va_start(ap, flags); va_start(ap, flags);
while(1) { while(1) {
RedisModuleString *field, **valueptr; RedisModuleString *field, **valueptr;
@ -5405,11 +5424,22 @@ int RM_HashGet(RedisModuleKey *key, int flags, ...) {
} else { } else {
*existsptr = 0; *existsptr = 0;
} }
} else if (flags & REDISMODULE_HASH_EXPIRE_TIME) {
mstime_t *expireptr = va_arg(ap,mstime_t*);
*expireptr = REDISMODULE_NO_EXPIRE;
if (key->value) {
uint64_t expireTime = 0;
/* As an opt, avoid fetching value, only expire time */
int res = hashTypeGetValueObject(key->db, key->value, field->ptr,
hfeFlags, NULL, &expireTime, NULL);
/* If field has expiration time */
if (res && expireTime != 0) *expireptr = expireTime;
}
} else { } else {
valueptr = va_arg(ap,RedisModuleString**); valueptr = va_arg(ap,RedisModuleString**);
if (key->value) { if (key->value) {
*valueptr = hashTypeGetValueObject(key->db, key->value, field->ptr, hashTypeGetValueObject(key->db, key->value, field->ptr,
hfeFlags, NULL); hfeFlags, valueptr, NULL, NULL);
if (*valueptr) { if (*valueptr) {
robj *decoded = getDecodedObject(*valueptr); robj *decoded = getDecodedObject(*valueptr);
@ -5430,10 +5460,27 @@ int RM_HashGet(RedisModuleKey *key, int flags, ...) {
return REDISMODULE_OK; return REDISMODULE_OK;
} }
/**
* Retrieves the minimum expiration time of fields in a hash.
*
* Return:
* - The minimum expiration time (in milliseconds) of the hash fields if at
* least one field has an expiration set.
* - REDISMODULE_NO_EXPIRE if no fields have an expiration set or if the key
* is not a hash.
*/
mstime_t RM_HashFieldMinExpire(RedisModuleKey *key) {
if ((!key->value) || (key->value->type != OBJ_HASH))
return REDISMODULE_NO_EXPIRE;
mstime_t min = hashTypeGetMinExpire(key->value, 1);
return (min == EB_EXPIRE_TIME_INVALID) ? REDISMODULE_NO_EXPIRE : min;
}
/* -------------------------------------------------------------------------- /* --------------------------------------------------------------------------
* ## Key API for Stream type * ## Key API for Stream type
* *
* For an introduction to streams, see https://redis.io/topics/streams-intro. * For an introduction to streams, see https://redis.io/docs/latest/develop/data-types/streams/.
* *
* The type RedisModuleStreamID, which is used in stream functions, is a struct * The type RedisModuleStreamID, which is used in stream functions, is a struct
* with two 64-bit fields and is defined as * with two 64-bit fields and is defined as
@ -6306,7 +6353,7 @@ fmterr:
* // Do something with myval. * // Do something with myval.
* } * }
* *
* This API is documented here: https://redis.io/topics/modules-intro * This API is documented here: https://redis.io/docs/latest/develop/reference/modules/
*/ */
RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...) { RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...) {
client *c = NULL; client *c = NULL;
@ -6816,7 +6863,7 @@ robj *moduleTypeDupOrReply(client *c, robj *fromkey, robj *tokey, int todb, robj
/* Register a new data type exported by the module. The parameters are the /* Register a new data type exported by the module. The parameters are the
* following. Please for in depth documentation check the modules API * following. Please for in depth documentation check the modules API
* documentation, especially https://redis.io/topics/modules-native-types. * documentation, especially https://redis.io/docs/latest/develop/reference/modules/modules-native-types/.
* *
* * **name**: A 9 characters data type name that MUST be unique in the Redis * * **name**: A 9 characters data type name that MUST be unique in the Redis
* Modules ecosystem. Be creative... and there will be no collisions. Use * Modules ecosystem. Be creative... and there will be no collisions. Use
@ -7705,7 +7752,7 @@ void RM_LatencyAddSample(const char *event, mstime_t latency) {
* ## Blocking clients from modules * ## Blocking clients from modules
* *
* For a guide about blocking commands in modules, see * For a guide about blocking commands in modules, see
* https://redis.io/topics/modules-blocking-ops. * https://redis.io/docs/latest/develop/reference/modules/modules-blocking-ops/.
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
/* Returns 1 if the client already in the moduleUnblocked list, 0 otherwise. */ /* Returns 1 if the client already in the moduleUnblocked list, 0 otherwise. */
@ -8717,7 +8764,7 @@ void moduleReleaseGIL(void) {
* runs is dangerous and discouraged. In order to react to key space events with * runs is dangerous and discouraged. In order to react to key space events with
* write actions, please refer to `RM_AddPostNotificationJob`. * write actions, please refer to `RM_AddPostNotificationJob`.
* *
* See https://redis.io/topics/notifications for more information. * See https://redis.io/docs/latest/develop/use/keyspace-notifications/ for more information.
*/ */
int RM_SubscribeToKeyspaceEvents(RedisModuleCtx *ctx, int types, RedisModuleNotificationFunc callback) { int RM_SubscribeToKeyspaceEvents(RedisModuleCtx *ctx, int types, RedisModuleNotificationFunc callback) {
RedisModuleKeyspaceSubscriber *sub = zmalloc(sizeof(*sub)); RedisModuleKeyspaceSubscriber *sub = zmalloc(sizeof(*sub));
@ -9678,6 +9725,12 @@ RedisModuleString *RM_GetModuleUserACLString(RedisModuleUser *user) {
* The returned string must be released with RedisModule_FreeString() or by * The returned string must be released with RedisModule_FreeString() or by
* enabling automatic memory management. */ * enabling automatic memory management. */
RedisModuleString *RM_GetCurrentUserName(RedisModuleCtx *ctx) { RedisModuleString *RM_GetCurrentUserName(RedisModuleCtx *ctx) {
/* Sometimes, the user isn't passed along the call stack or isn't
* even set, so we need to check for the members to avoid crashes. */
if (ctx->client == NULL || ctx->client->user == NULL || ctx->client->user->name == NULL) {
return NULL;
}
return RM_CreateString(ctx,ctx->client->user->name,sdslen(ctx->client->user->name)); return RM_CreateString(ctx,ctx->client->user->name,sdslen(ctx->client->user->name));
} }
@ -9766,6 +9819,45 @@ int RM_ACLCheckKeyPermissions(RedisModuleUser *user, RedisModuleString *key, int
return REDISMODULE_OK; return REDISMODULE_OK;
} }
/* Check if the user can access keys matching the given key prefix according to the ACLs
* attached to the user and the flags representing key access. The flags are the same that
* are used in the keyspec for logical operations. These flags are documented in
* RedisModule_SetCommandInfo as the REDISMODULE_CMD_KEY_ACCESS,
* REDISMODULE_CMD_KEY_UPDATE, REDISMODULE_CMD_KEY_INSERT, and REDISMODULE_CMD_KEY_DELETE flags.
*
* If no flags are supplied, the user is still required to have some access to keys matching
* the prefix for this command to return successfully.
*
* If the user is able to access keys matching the prefix, then REDISMODULE_OK is returned.
* Otherwise, REDISMODULE_ERR is returned and errno is set to one of the following values:
*
* * EINVAL: The provided flags are invalid.
* * EACCES: The user does not have permission to access keys matching the prefix.
*/
int RM_ACLCheckKeyPrefixPermissions(RedisModuleUser *user, RedisModuleString *prefix, int flags) {
const int allow_mask = (REDISMODULE_CMD_KEY_ACCESS
| REDISMODULE_CMD_KEY_INSERT
| REDISMODULE_CMD_KEY_DELETE
| REDISMODULE_CMD_KEY_UPDATE);
if ((flags & allow_mask) != flags) {
errno = EINVAL;
return REDISMODULE_ERR;
}
int keyspec_flags = moduleConvertKeySpecsFlags(flags, 0);
/* Add the prefix flag to the keyspec flags */
keyspec_flags |= CMD_KEY_PREFIX;
if (ACLUserCheckKeyPerm(user->user, prefix->ptr, sdslen(prefix->ptr), keyspec_flags) != ACL_OK) {
errno = EACCES;
return REDISMODULE_ERR;
}
return REDISMODULE_OK;
}
/* Check if the pubsub channel can be accessed by the user based off of the given /* Check if the pubsub channel can be accessed by the user based off of the given
* access flags. See RM_ChannelAtPosWithFlags for more information about the * access flags. See RM_ChannelAtPosWithFlags for more information about the
* possible flags that can be passed in. * possible flags that can be passed in.
@ -10465,7 +10557,7 @@ RedisModuleServerInfoData *RM_GetServerInfo(RedisModuleCtx *ctx, const char *sec
* context instead of passing NULL. */ * context instead of passing NULL. */
void RM_FreeServerInfo(RedisModuleCtx *ctx, RedisModuleServerInfoData *data) { void RM_FreeServerInfo(RedisModuleCtx *ctx, RedisModuleServerInfoData *data) {
if (ctx != NULL) autoMemoryFreed(ctx,REDISMODULE_AM_INFO,data); if (ctx != NULL) autoMemoryFreed(ctx,REDISMODULE_AM_INFO,data);
raxFreeWithCallback(data->rax, (void(*)(void*))sdsfree); raxFreeWithCallback(data->rax, sdsfreegeneric);
zfree(data); zfree(data);
} }
@ -10816,6 +10908,10 @@ void moduleCallCommandFilters(client *c) {
f->callback(&filter); f->callback(&filter);
} }
/* If the filter sets a new command, including command or subcommand,
* the command looked up in IO threads will be invalid. */
c->iolookedcmd = NULL;
c->argv = filter.argv; c->argv = filter.argv;
c->argv_len = filter.argv_len; c->argv_len = filter.argv_len;
c->argc = filter.argc; c->argc = filter.argc;
@ -12092,10 +12188,9 @@ void moduleRemoveConfigs(RedisModule *module) {
listRewind(module->module_configs, &li); listRewind(module->module_configs, &li);
while ((ln = listNext(&li))) { while ((ln = listNext(&li))) {
ModuleConfig *config = listNodeValue(ln); ModuleConfig *config = listNodeValue(ln);
sds module_name = sdsnew(module->name); removeConfig(config->name);
sds full_name = sdscat(sdscat(module_name, "."), config->name); /* ModuleName.ModuleConfig */ if (config->alias)
removeConfig(full_name); removeConfig(config->alias);
sdsfree(full_name);
} }
} }
@ -12134,6 +12229,12 @@ void moduleLoadFromQueue(void) {
listDelNode(server.loadmodule_queue, ln); listDelNode(server.loadmodule_queue, ln);
} }
if (dictSize(server.module_configs_queue)) { if (dictSize(server.module_configs_queue)) {
serverLog(LL_WARNING, "Unresolved Configuration(s) Detected:");
dictIterator *di = dictGetIterator(server.module_configs_queue);
dictEntry *de;
while ((de = dictNext(di)) != NULL) {
serverLog(LL_WARNING, ">>> '%s %s'", (char *)dictGetKey(de), (char *)dictGetVal(de));
}
serverLog(LL_WARNING, "Module Configuration detected without loadmodule directive or no ApplyConfig call: aborting"); serverLog(LL_WARNING, "Module Configuration detected without loadmodule directive or no ApplyConfig call: aborting");
exit(1); exit(1);
} }
@ -12227,6 +12328,7 @@ int moduleFreeCommand(struct RedisModule *module, struct redisCommand *cmd) {
} }
void moduleUnregisterCommands(struct RedisModule *module) { void moduleUnregisterCommands(struct RedisModule *module) {
pauseAllIOThreads();
/* Unregister all the commands registered by this module. */ /* Unregister all the commands registered by this module. */
dictIterator *di = dictGetSafeIterator(server.commands); dictIterator *di = dictGetSafeIterator(server.commands);
dictEntry *de; dictEntry *de;
@ -12241,6 +12343,7 @@ void moduleUnregisterCommands(struct RedisModule *module) {
zfree(cmd); zfree(cmd);
} }
dictReleaseIterator(di); dictReleaseIterator(di);
resumeAllIOThreads();
} }
/* We parse argv to add sds "NAME VALUE" pairs to the server.module_configs_queue list of configs. /* We parse argv to add sds "NAME VALUE" pairs to the server.module_configs_queue list of configs.
@ -12373,7 +12476,7 @@ int moduleLoad(const char *path, void **module_argv, int module_argc, int is_loa
} }
if (post_load_err) { if (post_load_err) {
moduleUnload(ctx.module->name, NULL); serverAssert(moduleUnload(ctx.module->name, NULL, 1) == C_OK);
moduleFreeContext(&ctx); moduleFreeContext(&ctx);
return C_ERR; return C_ERR;
} }
@ -12389,14 +12492,17 @@ int moduleLoad(const char *path, void **module_argv, int module_argc, int is_loa
/* Unload the module registered with the specified name. On success /* Unload the module registered with the specified name. On success
* C_OK is returned, otherwise C_ERR is returned and errmsg is set * C_OK is returned, otherwise C_ERR is returned and errmsg is set
* with an appropriate message. */ * with an appropriate message.
int moduleUnload(sds name, const char **errmsg) { * Only forcefully unload this module, passing forced_unload != 0,
* if it is certain that it has not yet been in use (e.g., immediate
* unload on failed load). */
int moduleUnload(sds name, const char **errmsg, int forced_unload) {
struct RedisModule *module = dictFetchValue(modules,name); struct RedisModule *module = dictFetchValue(modules,name);
if (module == NULL) { if (module == NULL) {
*errmsg = "no such module with that name"; *errmsg = "no such module with that name";
return C_ERR; return C_ERR;
} else if (listLength(module->types)) { } else if (listLength(module->types) && !forced_unload) {
*errmsg = "the module exports one or more module-side data " *errmsg = "the module exports one or more module-side data "
"types, can't unload"; "types, can't unload";
return C_ERR; return C_ERR;
@ -12581,7 +12687,8 @@ int moduleVerifyConfigFlags(unsigned int flags, configType type) {
| REDISMODULE_CONFIG_PROTECTED | REDISMODULE_CONFIG_PROTECTED
| REDISMODULE_CONFIG_DENY_LOADING | REDISMODULE_CONFIG_DENY_LOADING
| REDISMODULE_CONFIG_BITFLAGS | REDISMODULE_CONFIG_BITFLAGS
| REDISMODULE_CONFIG_MEMORY))) { | REDISMODULE_CONFIG_MEMORY
| REDISMODULE_CONFIG_UNPREFIXED))) {
serverLogRaw(LL_WARNING, "Invalid flag(s) for configuration"); serverLogRaw(LL_WARNING, "Invalid flag(s) for configuration");
return REDISMODULE_ERR; return REDISMODULE_ERR;
} }
@ -12618,6 +12725,54 @@ int moduleVerifyResourceName(const char *name) {
return REDISMODULE_OK; return REDISMODULE_OK;
} }
/* Verify unprefixed name config might be a single "<name>" or in the form
* "<name>|<alias>". Unlike moduleVerifyResourceName(), unprefixed name config
* allows a single dot in the name or alias.
*
* delim - Updates to point to "|" if it exists, NULL otherwise.
*/
int moduleVerifyUnprefixedName(const char *nameAlias, const char **delim) {
if (nameAlias[0] == '\0')
return REDISMODULE_ERR;
*delim = NULL;
int dot_count = 0, lname = 0;
for (size_t i = 0; nameAlias[i] != '\0'; i++) {
char ch = nameAlias[i];
if (((*delim) == NULL) && (ch == '|')) {
/* Handle single separator between name and alias */
if (!lname) {
serverLog(LL_WARNING, "Module configuration name is empty: %s", nameAlias);
return REDISMODULE_ERR;
}
*delim = &nameAlias[i];
dot_count = lname = 0;
} else if ( (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
(ch >= '0' && ch <= '9') || (ch == '_') || (ch == '-') )
{
++lname;
} else if (ch == '.') {
/* Allow only one dot per section (name or alias) */
if (++dot_count > 1) {
serverLog(LL_WARNING, "Invalid character sequence in Module configuration name or alias: %s", nameAlias);
return REDISMODULE_ERR;
}
} else {
serverLog(LL_WARNING, "Invalid character %c in Module configuration name or alias %s.", ch, nameAlias);
return REDISMODULE_ERR;
}
}
if (!lname) {
serverLog(LL_WARNING, "Module configuration name or alias is empty : %s", nameAlias);
return REDISMODULE_ERR;
}
return REDISMODULE_OK;
}
/* This is a series of set functions for each type that act as dispatchers for /* This is a series of set functions for each type that act as dispatchers for
* config.c to call module set callbacks. */ * config.c to call module set callbacks. */
#define CONFIG_ERR_SIZE 256 #define CONFIG_ERR_SIZE 256
@ -12630,9 +12785,24 @@ static void propagateErrorString(RedisModuleString *err_in, const char **err) {
} }
} }
/* If configuration was originally registered with indication to prefix the name,
* return the name without the prefix by skipping prefix "<MODULE-NAME>.".
* Otherwise, return the stored name as is. */
static char *getRegisteredConfigName(ModuleConfig *config) {
if (config->unprefixedFlag)
return config->name;
/* For prefixed configuration, find the '.' indicating the end of the prefix */
char *endOfPrefix = strchr(config->name, '.');
serverAssert(endOfPrefix != NULL);
return endOfPrefix + 1;
}
int setModuleBoolConfig(ModuleConfig *config, int val, const char **err) { int setModuleBoolConfig(ModuleConfig *config, int val, const char **err) {
RedisModuleString *error = NULL; RedisModuleString *error = NULL;
int return_code = config->set_fn.set_bool(config->name, val, config->privdata, &error);
char *rname = getRegisteredConfigName(config);
int return_code = config->set_fn.set_bool(rname, val, config->privdata, &error);
propagateErrorString(error, err); propagateErrorString(error, err);
return return_code == REDISMODULE_OK ? 1 : 0; return return_code == REDISMODULE_OK ? 1 : 0;
} }
@ -12640,7 +12810,9 @@ int setModuleBoolConfig(ModuleConfig *config, int val, const char **err) {
int setModuleStringConfig(ModuleConfig *config, sds strval, const char **err) { int setModuleStringConfig(ModuleConfig *config, sds strval, const char **err) {
RedisModuleString *error = NULL; RedisModuleString *error = NULL;
RedisModuleString *new = createStringObject(strval, sdslen(strval)); RedisModuleString *new = createStringObject(strval, sdslen(strval));
int return_code = config->set_fn.set_string(config->name, new, config->privdata, &error);
char *rname = getRegisteredConfigName(config);
int return_code = config->set_fn.set_string(rname, new, config->privdata, &error);
propagateErrorString(error, err); propagateErrorString(error, err);
decrRefCount(new); decrRefCount(new);
return return_code == REDISMODULE_OK ? 1 : 0; return return_code == REDISMODULE_OK ? 1 : 0;
@ -12655,7 +12827,8 @@ int setModuleEnumConfig(ModuleConfig *config, int val, const char **err) {
int setModuleNumericConfig(ModuleConfig *config, long long val, const char **err) { int setModuleNumericConfig(ModuleConfig *config, long long val, const char **err) {
RedisModuleString *error = NULL; RedisModuleString *error = NULL;
int return_code = config->set_fn.set_numeric(config->name, val, config->privdata, &error); char *rname = getRegisteredConfigName(config);
int return_code = config->set_fn.set_numeric(rname, val, config->privdata, &error);
propagateErrorString(error, err); propagateErrorString(error, err);
return return_code == REDISMODULE_OK ? 1 : 0; return return_code == REDISMODULE_OK ? 1 : 0;
} }
@ -12663,20 +12836,24 @@ int setModuleNumericConfig(ModuleConfig *config, long long val, const char **err
/* This is a series of get functions for each type that act as dispatchers for /* This is a series of get functions for each type that act as dispatchers for
* config.c to call module set callbacks. */ * config.c to call module set callbacks. */
int getModuleBoolConfig(ModuleConfig *module_config) { int getModuleBoolConfig(ModuleConfig *module_config) {
return module_config->get_fn.get_bool(module_config->name, module_config->privdata); char *rname = getRegisteredConfigName(module_config);
return module_config->get_fn.get_bool(rname, module_config->privdata);
} }
sds getModuleStringConfig(ModuleConfig *module_config) { sds getModuleStringConfig(ModuleConfig *module_config) {
RedisModuleString *val = module_config->get_fn.get_string(module_config->name, module_config->privdata); char *rname = getRegisteredConfigName(module_config);
RedisModuleString *val = module_config->get_fn.get_string(rname, module_config->privdata);
return val ? sdsdup(val->ptr) : NULL; return val ? sdsdup(val->ptr) : NULL;
} }
int getModuleEnumConfig(ModuleConfig *module_config) { int getModuleEnumConfig(ModuleConfig *module_config) {
return module_config->get_fn.get_enum(module_config->name, module_config->privdata); char *rname = getRegisteredConfigName(module_config);
return module_config->get_fn.get_enum(rname, module_config->privdata);
} }
long long getModuleNumericConfig(ModuleConfig *module_config) { long long getModuleNumericConfig(ModuleConfig *module_config) {
return module_config->get_fn.get_numeric(module_config->name, module_config->privdata); char *rname = getRegisteredConfigName(module_config);
return module_config->get_fn.get_numeric(rname, module_config->privdata);
} }
/* This function takes a module and a list of configs stored as sds NAME VALUE pairs. /* This function takes a module and a list of configs stored as sds NAME VALUE pairs.
@ -12688,25 +12865,26 @@ int loadModuleConfigs(RedisModule *module) {
listRewind(module->module_configs, &li); listRewind(module->module_configs, &li);
while ((ln = listNext(&li))) { while ((ln = listNext(&li))) {
ModuleConfig *module_config = listNodeValue(ln); ModuleConfig *module_config = listNodeValue(ln);
sds config_name = sdscatfmt(sdsempty(), "%s.%s", module->name, module_config->name); dictEntry *de = dictUnlink(server.module_configs_queue, module_config->name);
dictEntry *config_argument = dictFind(server.module_configs_queue, config_name); if ((!de) && (module_config->alias))
if (config_argument) { de = dictUnlink(server.module_configs_queue, module_config->alias);
if (!performModuleConfigSetFromName(dictGetKey(config_argument), dictGetVal(config_argument), &err)) {
serverLog(LL_WARNING, "Issue during loading of configuration %s : %s", (sds) dictGetKey(config_argument), err); /* If found in the queue, set the value. Otherwise, set the default value. */
sdsfree(config_name); if (de) {
if (!performModuleConfigSetFromName(dictGetKey(de), dictGetVal(de), &err)) {
serverLog(LL_WARNING, "Issue during loading of configuration %s : %s", (sds) dictGetKey(de), err);
dictFreeUnlinkedEntry(server.module_configs_queue, de);
dictEmpty(server.module_configs_queue, NULL); dictEmpty(server.module_configs_queue, NULL);
return REDISMODULE_ERR; return REDISMODULE_ERR;
} }
dictFreeUnlinkedEntry(server.module_configs_queue, de);
} else { } else {
if (!performModuleConfigSetDefaultFromName(config_name, &err)) { if (!performModuleConfigSetDefaultFromName(module_config->name, &err)) {
serverLog(LL_WARNING, "Issue attempting to set default value of configuration %s : %s", module_config->name, err); serverLog(LL_WARNING, "Issue attempting to set default value of configuration %s : %s", module_config->name, err);
sdsfree(config_name);
dictEmpty(server.module_configs_queue, NULL); dictEmpty(server.module_configs_queue, NULL);
return REDISMODULE_ERR; return REDISMODULE_ERR;
} }
} }
dictDelete(server.module_configs_queue, config_name);
sdsfree(config_name);
} }
module->configs_initialized = 1; module->configs_initialized = 1;
return REDISMODULE_OK; return REDISMODULE_OK;
@ -12756,26 +12934,93 @@ int moduleConfigApplyConfig(list *module_configs, const char **err, const char *
* ## Module Configurations API * ## Module Configurations API
* -------------------------------------------------------------------------- */ * -------------------------------------------------------------------------- */
/* Create a module config object. */ /* Resolve config name and create a module config object */
ModuleConfig *createModuleConfig(const char *name, RedisModuleConfigApplyFunc apply_fn, void *privdata, RedisModule *module) { ModuleConfig *createModuleConfig(const char *name, RedisModuleConfigApplyFunc apply_fn,
void *privdata, RedisModule *module, unsigned int flags)
{
sds cname, alias = NULL;
/* Determine the configuration name:
* - If the unprefixed flag is set, the "<MODULE-NAME>." prefix is omitted.
* - An optional alias can be specified using "<NAME>|<ALIAS>".
*
* Examples:
* - Unprefixed: "bf.initial_size" or "bf-initial-size|bf.initial_size".
* - Prefixed: "initial_size" becomes "<MODULE-NAME>.initial_size".
*/
if (flags & REDISMODULE_CONFIG_UNPREFIXED) {
const char *delim = strchr(name, '|');
cname = sdsnew(name);
if (delim) { /* Handle "<NAME>|<ALIAS>" format */
sdssubstr(cname, 0, delim - name);
alias = sdsnew(delim + 1);
}
} else {
/* Add the module name prefix */
cname = sdscatfmt(sdsempty(), "%s.%s", module->name, name);
}
ModuleConfig *new_config = zmalloc(sizeof(ModuleConfig)); ModuleConfig *new_config = zmalloc(sizeof(ModuleConfig));
new_config->name = sdsnew(name); new_config->unprefixedFlag = flags & REDISMODULE_CONFIG_UNPREFIXED;
new_config->name = cname;
new_config->alias = alias;
new_config->apply_fn = apply_fn; new_config->apply_fn = apply_fn;
new_config->privdata = privdata; new_config->privdata = privdata;
new_config->module = module; new_config->module = module;
return new_config; return new_config;
} }
/* Verify the configuration name and check for duplicates.
*
* - If the configuration is flagged as unprefixed, it checks for duplicate
* names and optional aliases in the format <NAME>|<ALIAS>.
* - If the configuration is prefixed, it ensures the name is unique with
* the module name prepended (<MODULE_NAME>.<NAME>).
*/
int moduleConfigValidityCheck(RedisModule *module, const char *name, unsigned int flags, configType type) { int moduleConfigValidityCheck(RedisModule *module, const char *name, unsigned int flags, configType type) {
if (!module->onload) { if (!module->onload) {
errno = EBUSY; errno = EBUSY;
return REDISMODULE_ERR; return REDISMODULE_ERR;
} }
if (moduleVerifyConfigFlags(flags, type) || moduleVerifyResourceName(name)) { if (moduleVerifyConfigFlags(flags, type)) {
errno = EINVAL; errno = EINVAL;
return REDISMODULE_ERR; return REDISMODULE_ERR;
} }
if (isModuleConfigNameRegistered(module, name)) {
int isdup = 0;
if (flags & REDISMODULE_CONFIG_UNPREFIXED) {
const char *delim = NULL; /* Pointer to the '|' delimiter in <NAME>|<ALIAS> */
if (moduleVerifyUnprefixedName(name, &delim)){
errno = EINVAL;
return REDISMODULE_ERR;
}
if (delim) {
/* Temporary split the "<NAME>|<ALIAS>" for the check */
int count;
sds *ar = sdssplitlen(name, strlen(name), "|", 1, &count);
serverAssert(count == 2); /* Already validated */
isdup = configExists(ar[0]) ||
configExists(ar[1]) ||
(sdscmp(ar[0], ar[1]) == 0);
sdsfreesplitres(ar, count);
} else {
sds _name = sdsnew(name);
isdup = configExists(_name);
sdsfree(_name);
}
} else {
if (moduleVerifyResourceName(name)) {
errno = EINVAL;
return REDISMODULE_ERR;
}
sds fullname = sdscatfmt(sdsempty(), "%s.%s", module->name, name);
isdup = configExists(fullname);
sdsfree(fullname);
}
if (isdup) {
serverLog(LL_WARNING, "Configuration by the name: %s already registered", name); serverLog(LL_WARNING, "Configuration by the name: %s already registered", name);
errno = EALREADY; errno = EALREADY;
return REDISMODULE_ERR; return REDISMODULE_ERR;
@ -12885,12 +13130,14 @@ int RM_RegisterStringConfig(RedisModuleCtx *ctx, const char *name, const char *d
if (moduleConfigValidityCheck(module, name, flags, NUMERIC_CONFIG)) { if (moduleConfigValidityCheck(module, name, flags, NUMERIC_CONFIG)) {
return REDISMODULE_ERR; return REDISMODULE_ERR;
} }
ModuleConfig *new_config = createModuleConfig(name, applyfn, privdata, module);
new_config->get_fn.get_string = getfn; ModuleConfig *mc = createModuleConfig(name, applyfn, privdata, module, flags);
new_config->set_fn.set_string = setfn; mc->get_fn.get_string = getfn;
listAddNodeTail(module->module_configs, new_config); mc->set_fn.set_string = setfn;
flags = maskModuleConfigFlags(flags); listAddNodeTail(module->module_configs, mc);
addModuleStringConfig(module->name, name, flags, new_config, default_val ? sdsnew(default_val) : NULL); unsigned int cflags = maskModuleConfigFlags(flags);
addModuleStringConfig(sdsdup(mc->name), (mc->alias) ? sdsdup(mc->alias) : NULL,
cflags, mc, default_val ? sdsnew(default_val) : NULL);
return REDISMODULE_OK; return REDISMODULE_OK;
} }
@ -12902,12 +13149,13 @@ int RM_RegisterBoolConfig(RedisModuleCtx *ctx, const char *name, int default_val
if (moduleConfigValidityCheck(module, name, flags, BOOL_CONFIG)) { if (moduleConfigValidityCheck(module, name, flags, BOOL_CONFIG)) {
return REDISMODULE_ERR; return REDISMODULE_ERR;
} }
ModuleConfig *new_config = createModuleConfig(name, applyfn, privdata, module); ModuleConfig *mc = createModuleConfig(name, applyfn, privdata, module, flags);
new_config->get_fn.get_bool = getfn; mc->get_fn.get_bool = getfn;
new_config->set_fn.set_bool = setfn; mc->set_fn.set_bool = setfn;
listAddNodeTail(module->module_configs, new_config); listAddNodeTail(module->module_configs, mc);
flags = maskModuleConfigFlags(flags); unsigned int cflags = maskModuleConfigFlags(flags);
addModuleBoolConfig(module->name, name, flags, new_config, default_val); addModuleBoolConfig(sdsdup(mc->name), (mc->alias) ? sdsdup(mc->alias) : NULL,
cflags, mc, default_val);
return REDISMODULE_OK; return REDISMODULE_OK;
} }
@ -12945,9 +13193,9 @@ int RM_RegisterEnumConfig(RedisModuleCtx *ctx, const char *name, int default_val
if (moduleConfigValidityCheck(module, name, flags, ENUM_CONFIG)) { if (moduleConfigValidityCheck(module, name, flags, ENUM_CONFIG)) {
return REDISMODULE_ERR; return REDISMODULE_ERR;
} }
ModuleConfig *new_config = createModuleConfig(name, applyfn, privdata, module); ModuleConfig *mc = createModuleConfig(name, applyfn, privdata, module, flags);
new_config->get_fn.get_enum = getfn; mc->get_fn.get_enum = getfn;
new_config->set_fn.set_enum = setfn; mc->set_fn.set_enum = setfn;
configEnum *enum_vals = zmalloc((num_enum_vals + 1) * sizeof(configEnum)); configEnum *enum_vals = zmalloc((num_enum_vals + 1) * sizeof(configEnum));
for (int i = 0; i < num_enum_vals; i++) { for (int i = 0; i < num_enum_vals; i++) {
enum_vals[i].name = zstrdup(enum_values[i]); enum_vals[i].name = zstrdup(enum_values[i]);
@ -12955,9 +13203,11 @@ int RM_RegisterEnumConfig(RedisModuleCtx *ctx, const char *name, int default_val
} }
enum_vals[num_enum_vals].name = NULL; enum_vals[num_enum_vals].name = NULL;
enum_vals[num_enum_vals].val = 0; enum_vals[num_enum_vals].val = 0;
listAddNodeTail(module->module_configs, new_config); listAddNodeTail(module->module_configs, mc);
flags = maskModuleConfigFlags(flags) | maskModuleEnumConfigFlags(flags);
addModuleEnumConfig(module->name, name, flags, new_config, default_val, enum_vals); unsigned int cflags = maskModuleConfigFlags(flags) | maskModuleEnumConfigFlags(flags);
addModuleEnumConfig(sdsdup(mc->name), (mc->alias) ? sdsdup(mc->alias) : NULL,
cflags, mc, default_val, enum_vals, num_enum_vals);
return REDISMODULE_OK; return REDISMODULE_OK;
} }
@ -12970,13 +13220,15 @@ int RM_RegisterNumericConfig(RedisModuleCtx *ctx, const char *name, long long de
if (moduleConfigValidityCheck(module, name, flags, NUMERIC_CONFIG)) { if (moduleConfigValidityCheck(module, name, flags, NUMERIC_CONFIG)) {
return REDISMODULE_ERR; return REDISMODULE_ERR;
} }
ModuleConfig *new_config = createModuleConfig(name, applyfn, privdata, module); ModuleConfig *mc = createModuleConfig(name, applyfn, privdata, module, flags);
new_config->get_fn.get_numeric = getfn; mc->get_fn.get_numeric = getfn;
new_config->set_fn.set_numeric = setfn; mc->set_fn.set_numeric = setfn;
listAddNodeTail(module->module_configs, new_config); listAddNodeTail(module->module_configs, mc);
unsigned int numeric_flags = maskModuleNumericConfigFlags(flags); unsigned int numeric_flags = maskModuleNumericConfigFlags(flags);
flags = maskModuleConfigFlags(flags);
addModuleNumericConfig(module->name, name, flags, new_config, default_val, numeric_flags, min, max); unsigned int cflags = maskModuleConfigFlags(flags);
addModuleNumericConfig(sdsdup(mc->name), (mc->alias) ? sdsdup(mc->alias) : NULL,
cflags, mc, default_val, numeric_flags, min, max);
return REDISMODULE_OK; return REDISMODULE_OK;
} }
@ -13184,7 +13436,7 @@ NULL
} else if (!strcasecmp(subcmd,"unload") && c->argc == 3) { } else if (!strcasecmp(subcmd,"unload") && c->argc == 3) {
const char *errmsg = NULL; const char *errmsg = NULL;
if (moduleUnload(c->argv[2]->ptr, &errmsg) == C_OK) if (moduleUnload(c->argv[2]->ptr, &errmsg, 0) == C_OK)
addReply(c,shared.ok); addReply(c,shared.ok);
else { else {
if (errmsg == NULL) errmsg = "operation not possible."; if (errmsg == NULL) errmsg = "operation not possible.";
@ -13826,6 +14078,7 @@ void moduleRegisterCoreAPI(void) {
REGISTER_API(ZsetRangeEndReached); REGISTER_API(ZsetRangeEndReached);
REGISTER_API(HashSet); REGISTER_API(HashSet);
REGISTER_API(HashGet); REGISTER_API(HashGet);
REGISTER_API(HashFieldMinExpire);
REGISTER_API(StreamAdd); REGISTER_API(StreamAdd);
REGISTER_API(StreamDelete); REGISTER_API(StreamDelete);
REGISTER_API(StreamIteratorStart); REGISTER_API(StreamIteratorStart);
@ -14024,6 +14277,7 @@ void moduleRegisterCoreAPI(void) {
REGISTER_API(GetModuleUserFromUserName); REGISTER_API(GetModuleUserFromUserName);
REGISTER_API(ACLCheckCommandPermissions); REGISTER_API(ACLCheckCommandPermissions);
REGISTER_API(ACLCheckKeyPermissions); REGISTER_API(ACLCheckKeyPermissions);
REGISTER_API(ACLCheckKeyPrefixPermissions);
REGISTER_API(ACLCheckChannelPermissions); REGISTER_API(ACLCheckChannelPermissions);
REGISTER_API(ACLAddLogEntry); REGISTER_API(ACLAddLogEntry);
REGISTER_API(ACLAddLogEntryByUserName); REGISTER_API(ACLAddLogEntryByUserName);

View File

@ -355,7 +355,12 @@ int isWatchedKeyExpired(client *c) {
} }
/* "Touch" a key, so that if this key is being WATCHed by some client the /* "Touch" a key, so that if this key is being WATCHed by some client the
* next EXEC will fail. */ * next EXEC will fail.
*
* Sanitizer suppression: IO threads also read c->flags, but never modify
* it or read the CLIENT_DIRTY_CAS bit, main thread just only modifies
* this bit, so there is actually no real data race. */
REDIS_NO_SANITIZE("thread")
void touchWatchedKey(redisDb *db, robj *key) { void touchWatchedKey(redisDb *db, robj *key) {
list *clients; list *clients;
listIter li; listIter li;
@ -404,6 +409,7 @@ void touchWatchedKey(redisDb *db, robj *key) {
* replaced_with: for SWAPDB, the WATCH should be invalidated if * replaced_with: for SWAPDB, the WATCH should be invalidated if
* the key exists in either of them, and skipped only if it * the key exists in either of them, and skipped only if it
* doesn't exist in both. */ * doesn't exist in both. */
REDIS_NO_SANITIZE("thread")
void touchAllWatchedKeysInDb(redisDb *emptied, redisDb *replaced_with) { void touchAllWatchedKeysInDb(redisDb *emptied, redisDb *replaced_with) {
listIter li; listIter li;
listNode *ln; listNode *ln;

File diff suppressed because it is too large Load Diff

View File

@ -9,7 +9,7 @@
#include "server.h" #include "server.h"
/* This file implements keyspace events notification via Pub/Sub and /* This file implements keyspace events notification via Pub/Sub and
* described at https://redis.io/topics/notifications. */ * described at https://redis.io/docs/latest/develop/use/keyspace-notifications/. */
/* Turn a string representing notification classes into an integer /* Turn a string representing notification classes into an integer
* representing notification classes flags xored. * representing notification classes flags xored.

View File

@ -680,6 +680,18 @@ robj *tryObjectEncoding(robj *o) {
return tryObjectEncodingEx(o, 1); return tryObjectEncodingEx(o, 1);
} }
size_t getObjectLength(robj *o) {
switch(o->type) {
case OBJ_STRING: return stringObjectLen(o);
case OBJ_LIST: return listTypeLength(o);
case OBJ_SET: return setTypeSize(o);
case OBJ_ZSET: return zsetLength(o);
case OBJ_HASH: return hashTypeLength(o, 0);
case OBJ_STREAM: return streamLength(o);
default: return 0;
}
}
/* Get a decoded version of an encoded object (returned as a new object). /* Get a decoded version of an encoded object (returned as a new object).
* If the object is already raw-encoded just increment the ref count. */ * If the object is already raw-encoded just increment the ref count. */
robj *getDecodedObject(robj *o) { robj *getDecodedObject(robj *o) {
@ -1218,12 +1230,16 @@ struct redisMemOverhead *getMemoryOverheadData(void) {
mh->aof_buffer = mem; mh->aof_buffer = mem;
mem_total+=mem; mem_total+=mem;
mem = evalScriptsMemory(); mem = evalScriptsMemoryEngine();
mh->lua_caches = mem; mh->eval_caches = mem;
mem_total+=mem; mem_total+=mem;
mh->functions_caches = functionsMemoryOverhead(); mh->functions_caches = functionsMemoryEngine();
mem_total+=mh->functions_caches; mem_total+=mh->functions_caches;
mh->script_vm = evalScriptsMemoryVM();
mh->script_vm += functionsMemoryVM();
mem_total+=mh->script_vm;
for (j = 0; j < server.dbnum; j++) { for (j = 0; j < server.dbnum; j++) {
redisDb *db = server.db+j; redisDb *db = server.db+j;
if (!kvstoreNumAllocatedDicts(db->keys)) continue; if (!kvstoreNumAllocatedDicts(db->keys)) continue;
@ -1544,7 +1560,7 @@ NULL
} else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) { } else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) {
struct redisMemOverhead *mh = getMemoryOverheadData(); struct redisMemOverhead *mh = getMemoryOverheadData();
addReplyMapLen(c,31+mh->num_dbs); addReplyMapLen(c,32+mh->num_dbs);
addReplyBulkCString(c,"peak.allocated"); addReplyBulkCString(c,"peak.allocated");
addReplyLongLong(c,mh->peak_allocated); addReplyLongLong(c,mh->peak_allocated);
@ -1571,11 +1587,14 @@ NULL
addReplyLongLong(c,mh->aof_buffer); addReplyLongLong(c,mh->aof_buffer);
addReplyBulkCString(c,"lua.caches"); addReplyBulkCString(c,"lua.caches");
addReplyLongLong(c,mh->lua_caches); addReplyLongLong(c,mh->eval_caches);
addReplyBulkCString(c,"functions.caches"); addReplyBulkCString(c,"functions.caches");
addReplyLongLong(c,mh->functions_caches); addReplyLongLong(c,mh->functions_caches);
addReplyBulkCString(c,"script.VMs");
addReplyLongLong(c,mh->script_vm);
for (size_t j = 0; j < mh->num_dbs; j++) { for (size_t j = 0; j < mh->num_dbs; j++) {
char dbname[32]; char dbname[32];
snprintf(dbname,sizeof(dbname),"db.%zd",mh->db[j].dbid); snprintf(dbname,sizeof(dbname),"db.%zd",mh->db[j].dbid);

View File

@ -1244,10 +1244,17 @@ int quicklistDelRange(quicklist *quicklist, const long start,
/* compare between a two entries */ /* compare between a two entries */
int quicklistCompare(quicklistEntry* entry, unsigned char *p2, const size_t p2_len) { int quicklistCompare(quicklistEntry* entry, unsigned char *p2, const size_t p2_len) {
if (unlikely(QL_NODE_IS_PLAIN(entry->node))) { if (entry->value) {
return ((entry->sz == p2_len) && (memcmp(entry->value, p2, p2_len) == 0)); return ((entry->sz == p2_len) && (memcmp(entry->value, p2, p2_len) == 0));
} else {
/* We use string2ll() to get an integer representation of the
* string 'p2' and compare it to 'entry->longval', it's much
* faster than convert integer to string and comparing. */
long long sval;
if (string2ll((const char*)p2, p2_len, &sval))
return entry->longval == sval;
} }
return lpCompare(entry->zi, p2, p2_len); return 0;
} }
/* Returns a quicklist iterator 'iter'. After the initialization every /* Returns a quicklist iterator 'iter'. After the initialization every
@ -2119,7 +2126,7 @@ int quicklistTest(int argc, char *argv[], int flags) {
quicklistRelease(ql); quicklistRelease(ql);
} }
TEST("Comprassion Plain node") { TEST("Compression Plain node") {
for (int f = 0; f < fill_count; f++) { for (int f = 0; f < fill_count; f++) {
size_t large_limit = (fills[f] < 0) ? quicklistNodeNegFillLimit(fills[f]) + 1 : SIZE_SAFETY_LIMIT + 1; size_t large_limit = (fills[f] < 0) ? quicklistNodeNegFillLimit(fills[f]) + 1 : SIZE_SAFETY_LIMIT + 1;
@ -3294,7 +3301,7 @@ int quicklistTest(int argc, char *argv[], int flags) {
} }
#if ULONG_MAX >= 0xffffffffffffffff #if ULONG_MAX >= 0xffffffffffffffff
TEST("compress and decomress quicklist plain node large than UINT32_MAX") { TEST("compress and decompress quicklist plain node larger than UINT32_MAX") {
size_t sz = (1ull << 32); size_t sz = (1ull << 32);
unsigned char *s = zmalloc(sz); unsigned char *s = zmalloc(sz);
randstring(s, sz); randstring(s, sz);

138
src/rdb.c
View File

@ -2,8 +2,13 @@
* Copyright (c) 2009-Present, Redis Ltd. * Copyright (c) 2009-Present, Redis Ltd.
* All rights reserved. * All rights reserved.
* *
* Copyright (c) 2024-present, Valkey contributors.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0 * Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2) or the Server Side Public License v1 (SSPLv1). * (RSALv2) or the Server Side Public License v1 (SSPLv1).
*
* Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
*/ */
#include "server.h" #include "server.h"
@ -2332,6 +2337,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error)
rdbReportCorruptRDB("invalid expireAt time: %llu", rdbReportCorruptRDB("invalid expireAt time: %llu",
(unsigned long long) expireAt); (unsigned long long) expireAt);
decrRefCount(o); decrRefCount(o);
if (dupSearchDict != NULL) dictRelease(dupSearchDict);
return NULL; return NULL;
} }
@ -3809,8 +3815,10 @@ static void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) {
} }
if (server.rdb_child_exit_pipe!=-1) if (server.rdb_child_exit_pipe!=-1)
close(server.rdb_child_exit_pipe); close(server.rdb_child_exit_pipe);
aeDeleteFileEvent(server.el, server.rdb_pipe_read, AE_READABLE); if (server.rdb_pipe_read != -1) {
close(server.rdb_pipe_read); aeDeleteFileEvent(server.el, server.rdb_pipe_read, AE_READABLE);
close(server.rdb_pipe_read);
}
server.rdb_child_exit_pipe = -1; server.rdb_child_exit_pipe = -1;
server.rdb_pipe_read = -1; server.rdb_pipe_read = -1;
zfree(server.rdb_pipe_conns); zfree(server.rdb_pipe_conns);
@ -3874,7 +3882,8 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
listNode *ln; listNode *ln;
listIter li; listIter li;
pid_t childpid; pid_t childpid;
int pipefds[2], rdb_pipe_write, safe_to_exit_pipe; int pipefds[2], rdb_pipe_write = 0, safe_to_exit_pipe = 0;
int rdb_channel = (req & SLAVE_REQ_RDB_CHANNEL);
if (hasActiveChildProcess()) return C_ERR; if (hasActiveChildProcess()) return C_ERR;
@ -3882,29 +3891,30 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
* drained the pipe. */ * drained the pipe. */
if (server.rdb_pipe_conns) return C_ERR; if (server.rdb_pipe_conns) return C_ERR;
/* Before to fork, create a pipe that is used to transfer the rdb bytes to if (!rdb_channel) {
* the parent, we can't let it write directly to the sockets, since in case /* Before to fork, create a pipe that is used to transfer the rdb bytes to
* of TLS we must let the parent handle a continuous TLS state when the * the parent, we can't let it write directly to the sockets, since in case
* child terminates and parent takes over. */ * of TLS we must let the parent handle a continuous TLS state when the
if (anetPipe(pipefds, O_NONBLOCK, 0) == -1) return C_ERR; * child terminates and parent takes over. */
server.rdb_pipe_read = pipefds[0]; /* read end */ if (anetPipe(pipefds, O_NONBLOCK, 0) == -1) return C_ERR;
rdb_pipe_write = pipefds[1]; /* write end */ server.rdb_pipe_read = pipefds[0]; /* read end */
rdb_pipe_write = pipefds[1]; /* write end */
/* create another pipe that is used by the parent to signal to the child /* create another pipe that is used by the parent to signal to the child
* that it can exit. */ * that it can exit. */
if (anetPipe(pipefds, 0, 0) == -1) { if (anetPipe(pipefds, 0, 0) == -1) {
close(rdb_pipe_write); close(rdb_pipe_write);
close(server.rdb_pipe_read); close(server.rdb_pipe_read);
return C_ERR; return C_ERR;
}
safe_to_exit_pipe = pipefds[0]; /* read end */
server.rdb_child_exit_pipe = pipefds[1]; /* write end */
} }
safe_to_exit_pipe = pipefds[0]; /* read end */
server.rdb_child_exit_pipe = pipefds[1]; /* write end */
/* Collect the connections of the replicas we want to transfer /* Collect the connections of the replicas we want to transfer
* the RDB to, which are i WAIT_BGSAVE_START state. */ * the RDB to, which are in WAIT_BGSAVE_START state. */
server.rdb_pipe_conns = zmalloc(sizeof(connection *)*listLength(server.slaves)); int numconns = 0;
server.rdb_pipe_numconns = 0; connection **conns = zmalloc(sizeof(*conns) * listLength(server.slaves));
server.rdb_pipe_numconns_writing = 0;
listRewind(server.slaves,&li); listRewind(server.slaves,&li);
while((ln = listNext(&li))) { while((ln = listNext(&li))) {
client *slave = ln->value; client *slave = ln->value;
@ -3912,22 +3922,36 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
/* Check slave has the exact requirements */ /* Check slave has the exact requirements */
if (slave->slave_req != req) if (slave->slave_req != req)
continue; continue;
server.rdb_pipe_conns[server.rdb_pipe_numconns++] = slave->conn; replicationSetupSlaveForFullResync(slave, getPsyncInitialOffset());
replicationSetupSlaveForFullResync(slave,getPsyncInitialOffset()); conns[numconns++] = slave->conn;
if (rdb_channel) {
/* Put the socket in blocking mode to simplify RDB transfer. */
connSendTimeout(slave->conn, server.repl_timeout * 1000);
connBlock(slave->conn);
}
} }
} }
if (!rdb_channel) {
server.rdb_pipe_conns = conns;
server.rdb_pipe_numconns = numconns;
server.rdb_pipe_numconns_writing = 0;
}
/* Create the child process. */ /* Create the child process. */
if ((childpid = redisFork(CHILD_TYPE_RDB)) == 0) { if ((childpid = redisFork(CHILD_TYPE_RDB)) == 0) {
/* Child */ /* Child */
int retval, dummy; int retval, dummy;
rio rdb; rio rdb;
rioInitWithFd(&rdb,rdb_pipe_write); if (rdb_channel) {
rioInitWithConnset(&rdb, conns, numconns);
/* Close the reading part, so that if the parent crashes, the child will } else {
* get a write error and exit. */ rioInitWithFd(&rdb,rdb_pipe_write);
close(server.rdb_pipe_read); /* Close the reading part, so that if the parent crashes, the child
* will get a write error and exit. */
close(server.rdb_pipe_read);
}
redisSetProcTitle("redis-rdb-to-slaves"); redisSetProcTitle("redis-rdb-to-slaves");
redisSetCpuAffinity(server.bgsave_cpulist); redisSetCpuAffinity(server.bgsave_cpulist);
@ -3940,14 +3964,19 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
sendChildCowInfo(CHILD_INFO_TYPE_RDB_COW_SIZE, "RDB"); sendChildCowInfo(CHILD_INFO_TYPE_RDB_COW_SIZE, "RDB");
} }
rioFreeFd(&rdb); if (rdb_channel) {
/* wake up the reader, tell it we're done. */ rioFreeConnset(&rdb);
close(rdb_pipe_write); } else {
close(server.rdb_child_exit_pipe); /* close write end so that we can detect the close on the parent. */ rioFreeFd(&rdb);
/* hold exit until the parent tells us it's safe. we're not expecting /* wake up the reader, tell it we're done. */
* to read anything, just get the error when the pipe is closed. */ close(rdb_pipe_write);
dummy = read(safe_to_exit_pipe, pipefds, 1); close(server.rdb_child_exit_pipe); /* close write end so that we can detect the close on the parent. */
UNUSED(dummy); /* hold exit until the parent tells us it's safe. we're not expecting
* to read anything, just get the error when the pipe is closed. */
dummy = read(safe_to_exit_pipe, pipefds, 1);
UNUSED(dummy);
}
zfree(conns);
exitFromChild((retval == C_OK) ? 0 : 1); exitFromChild((retval == C_OK) ? 0 : 1);
} else { } else {
/* Parent */ /* Parent */
@ -3965,24 +3994,33 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
slave->replstate = SLAVE_STATE_WAIT_BGSAVE_START; slave->replstate = SLAVE_STATE_WAIT_BGSAVE_START;
} }
} }
close(rdb_pipe_write);
close(server.rdb_pipe_read); if (!rdb_channel) {
close(server.rdb_child_exit_pipe); close(rdb_pipe_write);
zfree(server.rdb_pipe_conns); close(server.rdb_pipe_read);
server.rdb_pipe_conns = NULL; close(server.rdb_child_exit_pipe);
server.rdb_pipe_numconns = 0; zfree(server.rdb_pipe_conns);
server.rdb_pipe_numconns_writing = 0; server.rdb_pipe_conns = NULL;
server.rdb_pipe_numconns = 0;
server.rdb_pipe_numconns_writing = 0;
}
} else { } else {
serverLog(LL_NOTICE,"Background RDB transfer started by pid %ld", serverLog(LL_NOTICE, "Background RDB transfer started by pid %ld to %s", (long)childpid,
(long) childpid); rdb_channel ? "replica socket" : "parent process pipe");
server.rdb_save_time_start = time(NULL); server.rdb_save_time_start = time(NULL);
server.rdb_child_type = RDB_CHILD_TYPE_SOCKET; server.rdb_child_type = RDB_CHILD_TYPE_SOCKET;
close(rdb_pipe_write); /* close write in parent so that it can detect the close on the child. */ if (!rdb_channel) {
if (aeCreateFileEvent(server.el, server.rdb_pipe_read, AE_READABLE, rdbPipeReadHandler,NULL) == AE_ERR) { close(rdb_pipe_write); /* close write in parent so that it can detect the close on the child. */
serverPanic("Unrecoverable error creating server.rdb_pipe_read file event."); if (aeCreateFileEvent(server.el, server.rdb_pipe_read, AE_READABLE, rdbPipeReadHandler,NULL) == AE_ERR) {
serverPanic("Unrecoverable error creating server.rdb_pipe_read file event.");
}
} }
} }
close(safe_to_exit_pipe); if (rdb_channel)
zfree(conns);
else
close(safe_to_exit_pipe);
return (childpid == -1) ? C_ERR : C_OK; return (childpid == -1) ? C_ERR : C_OK;
} }
return C_OK; /* Unreached. */ return C_OK; /* Unreached. */

View File

@ -115,12 +115,13 @@ typedef long long ustime_t;
#define REDISMODULE_ZADD_LT (1<<6) #define REDISMODULE_ZADD_LT (1<<6)
/* Hash API flags. */ /* Hash API flags. */
#define REDISMODULE_HASH_NONE 0 #define REDISMODULE_HASH_NONE 0
#define REDISMODULE_HASH_NX (1<<0) #define REDISMODULE_HASH_NX (1<<0)
#define REDISMODULE_HASH_XX (1<<1) #define REDISMODULE_HASH_XX (1<<1)
#define REDISMODULE_HASH_CFIELDS (1<<2) #define REDISMODULE_HASH_CFIELDS (1<<2)
#define REDISMODULE_HASH_EXISTS (1<<3) #define REDISMODULE_HASH_EXISTS (1<<3)
#define REDISMODULE_HASH_COUNT_ALL (1<<4) #define REDISMODULE_HASH_COUNT_ALL (1<<4)
#define REDISMODULE_HASH_EXPIRE_TIME (1<<5)
#define REDISMODULE_CONFIG_DEFAULT 0 /* This is the default for a module config. */ #define REDISMODULE_CONFIG_DEFAULT 0 /* This is the default for a module config. */
#define REDISMODULE_CONFIG_IMMUTABLE (1ULL<<0) /* Can this value only be set at startup? */ #define REDISMODULE_CONFIG_IMMUTABLE (1ULL<<0) /* Can this value only be set at startup? */
@ -131,6 +132,7 @@ typedef long long ustime_t;
#define REDISMODULE_CONFIG_MEMORY (1ULL<<7) /* Indicates if this value can be set as a memory value */ #define REDISMODULE_CONFIG_MEMORY (1ULL<<7) /* Indicates if this value can be set as a memory value */
#define REDISMODULE_CONFIG_BITFLAGS (1ULL<<8) /* Indicates if this value can be set as a multiple enum values */ #define REDISMODULE_CONFIG_BITFLAGS (1ULL<<8) /* Indicates if this value can be set as a multiple enum values */
#define REDISMODULE_CONFIG_UNPREFIXED (1ULL<<9) /* Provided configuration name won't be prefixed with the module name */
/* StreamID type. */ /* StreamID type. */
typedef struct RedisModuleStreamID { typedef struct RedisModuleStreamID {
@ -1082,6 +1084,7 @@ REDISMODULE_API int (*RedisModule_ZsetRangePrev)(RedisModuleKey *key) REDISMODUL
REDISMODULE_API int (*RedisModule_ZsetRangeEndReached)(RedisModuleKey *key) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_ZsetRangeEndReached)(RedisModuleKey *key) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_HashSet)(RedisModuleKey *key, int flags, ...) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_HashSet)(RedisModuleKey *key, int flags, ...) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_HashGet)(RedisModuleKey *key, int flags, ...) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_HashGet)(RedisModuleKey *key, int flags, ...) REDISMODULE_ATTR;
REDISMODULE_API mstime_t (*RedisModule_HashFieldMinExpire)(RedisModuleKey *key) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_StreamAdd)(RedisModuleKey *key, int flags, RedisModuleStreamID *id, RedisModuleString **argv, int64_t numfields) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_StreamAdd)(RedisModuleKey *key, int flags, RedisModuleStreamID *id, RedisModuleString **argv, int64_t numfields) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_StreamDelete)(RedisModuleKey *key, RedisModuleStreamID *id) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_StreamDelete)(RedisModuleKey *key, RedisModuleStreamID *id) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_StreamIteratorStart)(RedisModuleKey *key, int flags, RedisModuleStreamID *startid, RedisModuleStreamID *endid) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_StreamIteratorStart)(RedisModuleKey *key, int flags, RedisModuleStreamID *startid, RedisModuleStreamID *endid) REDISMODULE_ATTR;
@ -1287,6 +1290,7 @@ REDISMODULE_API RedisModuleString * (*RedisModule_GetCurrentUserName)(RedisModul
REDISMODULE_API RedisModuleUser * (*RedisModule_GetModuleUserFromUserName)(RedisModuleString *name) REDISMODULE_ATTR; REDISMODULE_API RedisModuleUser * (*RedisModule_GetModuleUserFromUserName)(RedisModuleString *name) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_ACLCheckCommandPermissions)(RedisModuleUser *user, RedisModuleString **argv, int argc) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_ACLCheckCommandPermissions)(RedisModuleUser *user, RedisModuleString **argv, int argc) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_ACLCheckKeyPermissions)(RedisModuleUser *user, RedisModuleString *key, int flags) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_ACLCheckKeyPermissions)(RedisModuleUser *user, RedisModuleString *key, int flags) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_ACLCheckKeyPrefixPermissions)(RedisModuleUser *user, RedisModuleString *prefix, int flags) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_ACLCheckChannelPermissions)(RedisModuleUser *user, RedisModuleString *ch, int literal) REDISMODULE_ATTR; REDISMODULE_API int (*RedisModule_ACLCheckChannelPermissions)(RedisModuleUser *user, RedisModuleString *ch, int literal) REDISMODULE_ATTR;
REDISMODULE_API void (*RedisModule_ACLAddLogEntry)(RedisModuleCtx *ctx, RedisModuleUser *user, RedisModuleString *object, RedisModuleACLLogEntryReason reason) REDISMODULE_ATTR; REDISMODULE_API void (*RedisModule_ACLAddLogEntry)(RedisModuleCtx *ctx, RedisModuleUser *user, RedisModuleString *object, RedisModuleACLLogEntryReason reason) REDISMODULE_ATTR;
REDISMODULE_API void (*RedisModule_ACLAddLogEntryByUserName)(RedisModuleCtx *ctx, RedisModuleString *user, RedisModuleString *object, RedisModuleACLLogEntryReason reason) REDISMODULE_ATTR; REDISMODULE_API void (*RedisModule_ACLAddLogEntryByUserName)(RedisModuleCtx *ctx, RedisModuleString *user, RedisModuleString *object, RedisModuleACLLogEntryReason reason) REDISMODULE_ATTR;
@ -1451,6 +1455,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int
REDISMODULE_GET_API(ZsetRangeEndReached); REDISMODULE_GET_API(ZsetRangeEndReached);
REDISMODULE_GET_API(HashSet); REDISMODULE_GET_API(HashSet);
REDISMODULE_GET_API(HashGet); REDISMODULE_GET_API(HashGet);
REDISMODULE_GET_API(HashFieldMinExpire);
REDISMODULE_GET_API(StreamAdd); REDISMODULE_GET_API(StreamAdd);
REDISMODULE_GET_API(StreamDelete); REDISMODULE_GET_API(StreamDelete);
REDISMODULE_GET_API(StreamIteratorStart); REDISMODULE_GET_API(StreamIteratorStart);
@ -1656,6 +1661,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int
REDISMODULE_GET_API(GetModuleUserFromUserName); REDISMODULE_GET_API(GetModuleUserFromUserName);
REDISMODULE_GET_API(ACLCheckCommandPermissions); REDISMODULE_GET_API(ACLCheckCommandPermissions);
REDISMODULE_GET_API(ACLCheckKeyPermissions); REDISMODULE_GET_API(ACLCheckKeyPermissions);
REDISMODULE_GET_API(ACLCheckKeyPrefixPermissions);
REDISMODULE_GET_API(ACLCheckChannelPermissions); REDISMODULE_GET_API(ACLCheckChannelPermissions);
REDISMODULE_GET_API(ACLAddLogEntry); REDISMODULE_GET_API(ACLAddLogEntry);
REDISMODULE_GET_API(ACLAddLogEntryByUserName); REDISMODULE_GET_API(ACLAddLogEntryByUserName);

File diff suppressed because it is too large Load Diff

174
src/rio.c
View File

@ -1,3 +1,16 @@
/*
* Copyright (c) 2009-Present, Redis Ltd.
* All rights reserved.
*
* Copyright (c) 2024-present, Valkey contributors.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
*
* Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
*/
/* rio.c is a simple stream-oriented I/O abstraction that provides an interface /* rio.c is a simple stream-oriented I/O abstraction that provides an interface
* to write code that can consume/produce data using different concrete input * to write code that can consume/produce data using different concrete input
* and output devices. For instance the same rdb.c code using the rio * and output devices. For instance the same rdb.c code using the rio
@ -14,34 +27,6 @@
* for the current checksum. * for the current checksum.
* *
* ---------------------------------------------------------------------------- * ----------------------------------------------------------------------------
*
* Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
* Copyright (c) 2009-current, Redis Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/ */
@ -429,6 +414,139 @@ void rioFreeFd(rio *r) {
sdsfree(r->io.fd.buf); sdsfree(r->io.fd.buf);
} }
/* ------------------- Connection set implementation ------------------
* This target is used to write the RDB file to a set of replica connections as
* part of rdb channel replication. */
/* Returns 1 for success, 0 for failure.
* The function returns success as long as we are able to correctly write
* to at least one file descriptor.
*
* When buf is NULL or len is 0, the function performs a flush operation if
* there is some pending buffer, so this function is also used in order to
* implement rioConnsetFlush(). */
static size_t rioConnsetWrite(rio *r, const void *buf, size_t len) {
const size_t pre_flush_size = 256 * 1024;
unsigned char *p = (unsigned char*) buf;
size_t buflen = len;
size_t failed = 0; /* number of connections that write() returned error. */
/* For small writes, we rather keep the data in user-space buffer, and flush
* it only when it grows. however for larger writes, we prefer to flush
* any pre-existing buffer, and write the new one directly without reallocs
* and memory copying. */
if (len > pre_flush_size) {
rioConnsetWrite(r, NULL, 0);
} else {
if (buf && len) {
r->io.connset.buf = sdscatlen(r->io.connset.buf, buf, len);
if (sdslen(r->io.connset.buf) <= PROTO_IOBUF_LEN)
return 1;
}
p = (unsigned char *)r->io.connset.buf;
buflen = sdslen(r->io.connset.buf);
}
while (buflen > 0) {
/* Write in little chunks so that when there are big writes we
* parallelize while the kernel is sending data in background to the
* TCP socket. */
size_t limit = PROTO_IOBUF_LEN * 2;
size_t count = buflen < limit ? buflen : limit;
for (size_t i = 0; i < r->io.connset.n_dst; i++) {
size_t n_written = 0;
if (r->io.connset.dst[i].failed != 0) {
failed++;
continue; /* Skip failed connections. */
}
do {
ssize_t ret;
connection *c = r->io.connset.dst[i].conn;
ret = connWrite(c, p + n_written, count - n_written);
if (ret <= 0) {
if (errno == 0)
errno = EIO;
/* With blocking sockets, which is the sole user of this
* rio target, EWOULDBLOCK is returned only because of
* the SO_SNDTIMEO socket option, so we translate the error
* into one more recognizable by the user. */
if (ret == -1 && errno == EWOULDBLOCK)
errno = ETIMEDOUT;
r->io.connset.dst[i].failed = 1;
break;
}
n_written += ret;
} while (n_written != count);
}
if (failed == r->io.connset.n_dst)
return 0; /* All the connections have failed. */
p += count;
buflen -= count;
r->io.connset.pos += count;
}
sdsclear(r->io.connset.buf);
return 1;
}
/* Returns 1 or 0 for success/failure. */
static size_t rioConnsetRead(rio *r, void *buf, size_t len) {
UNUSED(r);
UNUSED(buf);
UNUSED(len);
return 0; /* Error, this target does not support reading. */
}
/* Returns the number of sent bytes. */
static off_t rioConnsetTell(rio *r) {
return r->io.connset.pos;
}
/* Flushes any buffer to target device if applicable. Returns 1 on success
* and 0 on failures. */
static int rioConnsetFlush(rio *r) {
/* Our flush is implemented by the write method, that recognizes a
* buffer set to NULL with a count of zero as a flush request. */
return rioConnsetWrite(r, NULL, 0);
}
static const rio rioConnsetIO = {
rioConnsetRead,
rioConnsetWrite,
rioConnsetTell,
rioConnsetFlush,
NULL, /* update_checksum */
0, /* current checksum */
0, /* flags */
0, /* bytes read or written */
0, /* read/write chunk size */
{ { NULL, 0 } } /* union for io-specific vars */
};
void rioInitWithConnset(rio *r, connection **conns, size_t n_conns) {
*r = rioConnsetIO;
r->io.connset.dst = zcalloc(sizeof(*r->io.connset.dst) * n_conns);
r->io.connset.n_dst = n_conns;
r->io.connset.pos = 0;
r->io.connset.buf = sdsempty();
for (size_t i = 0; i < n_conns; i++)
r->io.connset.dst[i].conn = conns[i];
}
/* release the rio stream. */
void rioFreeConnset(rio *r) {
zfree(r->io.connset.dst);
sdsfree(r->io.connset.buf);
}
/* ---------------------------- Generic functions ---------------------------- */ /* ---------------------------- Generic functions ---------------------------- */
/* This function can be installed both in memory and file streams when checksum /* This function can be installed both in memory and file streams when checksum

View File

@ -1,31 +1,14 @@
/* /*
* Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com> * Copyright (c) 2009-Present, Redis Ltd.
* Copyright (c) 2009-current, Redis Ltd.
* All rights reserved. * All rights reserved.
* *
* Redistribution and use in source and binary forms, with or without * Copyright (c) 2024-present, Valkey contributors.
* modification, are permitted provided that the following conditions are met: * All rights reserved.
* *
* * Redistributions of source code must retain the above copyright notice, * Licensed under your choice of the Redis Source Available License 2.0
* this list of conditions and the following disclaimer. * (RSALv2) or the Server Side Public License v1 (SSPLv1).
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
* *
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/ */
@ -39,6 +22,7 @@
#define RIO_FLAG_READ_ERROR (1<<0) #define RIO_FLAG_READ_ERROR (1<<0)
#define RIO_FLAG_WRITE_ERROR (1<<1) #define RIO_FLAG_WRITE_ERROR (1<<1)
#define RIO_FLAG_ABORT (1<<2)
#define RIO_TYPE_FILE (1<<0) #define RIO_TYPE_FILE (1<<0)
#define RIO_TYPE_BUFFER (1<<1) #define RIO_TYPE_BUFFER (1<<1)
@ -97,6 +81,17 @@ struct _rio {
off_t pos; off_t pos;
sds buf; sds buf;
} fd; } fd;
/* Multiple connections target (used to write to N sockets). */
struct {
struct {
connection *conn; /* Connection */
int failed; /* If write failed on this connection. */
} *dst;
size_t n_dst; /* Number of connections */
off_t pos; /* Number of sent bytes */
sds buf;
} connset;
} io; } io;
}; };
@ -107,7 +102,7 @@ typedef struct _rio rio;
* if needed. */ * if needed. */
static inline size_t rioWrite(rio *r, const void *buf, size_t len) { static inline size_t rioWrite(rio *r, const void *buf, size_t len) {
if (r->flags & RIO_FLAG_WRITE_ERROR) return 0; if (r->flags & (RIO_FLAG_WRITE_ERROR | RIO_FLAG_ABORT)) return 0;
while (len) { while (len) {
size_t bytes_to_write = (r->max_processing_chunk && r->max_processing_chunk < len) ? r->max_processing_chunk : len; size_t bytes_to_write = (r->max_processing_chunk && r->max_processing_chunk < len) ? r->max_processing_chunk : len;
if (r->update_cksum) r->update_cksum(r,buf,bytes_to_write); if (r->update_cksum) r->update_cksum(r,buf,bytes_to_write);
@ -123,7 +118,7 @@ static inline size_t rioWrite(rio *r, const void *buf, size_t len) {
} }
static inline size_t rioRead(rio *r, void *buf, size_t len) { static inline size_t rioRead(rio *r, void *buf, size_t len) {
if (r->flags & RIO_FLAG_READ_ERROR) return 0; if (r->flags & (RIO_FLAG_READ_ERROR | RIO_FLAG_ABORT)) return 0;
while (len) { while (len) {
size_t bytes_to_read = (r->max_processing_chunk && r->max_processing_chunk < len) ? r->max_processing_chunk : len; size_t bytes_to_read = (r->max_processing_chunk && r->max_processing_chunk < len) ? r->max_processing_chunk : len;
if (r->read(r,buf,bytes_to_read) == 0) { if (r->read(r,buf,bytes_to_read) == 0) {
@ -146,6 +141,10 @@ static inline int rioFlush(rio *r) {
return r->flush(r); return r->flush(r);
} }
static inline void rioAbort(rio *r) {
r->flags |= RIO_FLAG_ABORT;
}
/* This function allows to know if there was a read error in any past /* This function allows to know if there was a read error in any past
* operation, since the rio stream was created or since the last call * operation, since the rio stream was created or since the last call
* to rioClearError(). */ * to rioClearError(). */
@ -159,16 +158,18 @@ static inline int rioGetWriteError(rio *r) {
} }
static inline void rioClearErrors(rio *r) { static inline void rioClearErrors(rio *r) {
r->flags &= ~(RIO_FLAG_READ_ERROR|RIO_FLAG_WRITE_ERROR); r->flags &= ~(RIO_FLAG_READ_ERROR|RIO_FLAG_WRITE_ERROR|RIO_FLAG_ABORT);
} }
void rioInitWithFile(rio *r, FILE *fp); void rioInitWithFile(rio *r, FILE *fp);
void rioInitWithBuffer(rio *r, sds s); void rioInitWithBuffer(rio *r, sds s);
void rioInitWithConn(rio *r, connection *conn, size_t read_limit); void rioInitWithConn(rio *r, connection *conn, size_t read_limit);
void rioInitWithFd(rio *r, int fd); void rioInitWithFd(rio *r, int fd);
void rioInitWithConnset(rio *r, connection **conns, size_t n_conns);
void rioFreeFd(rio *r); void rioFreeFd(rio *r);
void rioFreeConn(rio *r, sds* out_remainingBufferedData); void rioFreeConn(rio *r, sds* out_remainingBufferedData);
void rioFreeConnset(rio *r);
size_t rioWriteBulkCount(rio *r, char prefix, long count); size_t rioWriteBulkCount(rio *r, char prefix, long count);
size_t rioWriteBulkString(rio *r, const char *buf, size_t len); size_t rioWriteBulkString(rio *r, const char *buf, size_t len);

View File

@ -64,7 +64,7 @@ lua_State *createLuaState(void) {
size_t sz = sizeof(unsigned int); size_t sz = sizeof(unsigned int);
int err = je_mallctl("tcache.create", (void *)&tcache, &sz, NULL, 0); int err = je_mallctl("tcache.create", (void *)&tcache, &sz, NULL, 0);
if (err) { if (err) {
serverLog(LL_WARNING, "Failed creating the lua jemalloc tcache."); serverLog(LL_WARNING, "Failed creating the lua jemalloc tcache (err=%d).", err);
exit(1); exit(1);
} }
@ -79,7 +79,7 @@ void luaEnvInit(void) {
size_t sz = sizeof(unsigned int); size_t sz = sizeof(unsigned int);
int err = je_mallctl("arenas.create", (void *)&arena, &sz, NULL, 0); int err = je_mallctl("arenas.create", (void *)&arena, &sz, NULL, 0);
if (err) { if (err) {
serverLog(LL_WARNING, "Failed creating the lua jemalloc arena."); serverLog(LL_WARNING, "Failed creating the lua jemalloc arena (err=%d).", err);
exit(1); exit(1);
} }
server.lua_arena = arena; server.lua_arena = arena;

View File

@ -174,6 +174,11 @@ void sdsfree(sds s) {
s_free((char*)s-sdsHdrSize(s[-1])); s_free((char*)s-sdsHdrSize(s[-1]));
} }
/* Generic version of sdsfree. */
void sdsfreegeneric(void *s) {
sdsfree((sds)s);
}
/* Set the sds string length to the length as obtained with strlen(), so /* Set the sds string length to the length as obtained with strlen(), so
* considering as content only up to the first null term character. * considering as content only up to the first null term character.
* *
@ -1443,29 +1448,29 @@ int sdsTest(int argc, char **argv, int flags) {
/* Test sdsresize - extend */ /* Test sdsresize - extend */
x = sdsnew("1234567890123456789012345678901234567890"); x = sdsnew("1234567890123456789012345678901234567890");
x = sdsResize(x, 200, 1); x = sdsResize(x, 200, 1);
test_cond("sdsrezie() expand len", sdslen(x) == 40); test_cond("sdsresize() expand len", sdslen(x) == 40);
test_cond("sdsrezie() expand strlen", strlen(x) == 40); test_cond("sdsresize() expand strlen", strlen(x) == 40);
test_cond("sdsrezie() expand alloc", sdsalloc(x) == 200); test_cond("sdsresize() expand alloc", sdsalloc(x) == 200);
/* Test sdsresize - trim free space */ /* Test sdsresize - trim free space */
x = sdsResize(x, 80, 1); x = sdsResize(x, 80, 1);
test_cond("sdsrezie() shrink len", sdslen(x) == 40); test_cond("sdsresize() shrink len", sdslen(x) == 40);
test_cond("sdsrezie() shrink strlen", strlen(x) == 40); test_cond("sdsresize() shrink strlen", strlen(x) == 40);
test_cond("sdsrezie() shrink alloc", sdsalloc(x) == 80); test_cond("sdsresize() shrink alloc", sdsalloc(x) == 80);
/* Test sdsresize - crop used space */ /* Test sdsresize - crop used space */
x = sdsResize(x, 30, 1); x = sdsResize(x, 30, 1);
test_cond("sdsrezie() crop len", sdslen(x) == 30); test_cond("sdsresize() crop len", sdslen(x) == 30);
test_cond("sdsrezie() crop strlen", strlen(x) == 30); test_cond("sdsresize() crop strlen", strlen(x) == 30);
test_cond("sdsrezie() crop alloc", sdsalloc(x) == 30); test_cond("sdsresize() crop alloc", sdsalloc(x) == 30);
/* Test sdsresize - extend to different class */ /* Test sdsresize - extend to different class */
x = sdsResize(x, 400, 1); x = sdsResize(x, 400, 1);
test_cond("sdsrezie() expand len", sdslen(x) == 30); test_cond("sdsresize() expand len", sdslen(x) == 30);
test_cond("sdsrezie() expand strlen", strlen(x) == 30); test_cond("sdsresize() expand strlen", strlen(x) == 30);
test_cond("sdsrezie() expand alloc", sdsalloc(x) == 400); test_cond("sdsresize() expand alloc", sdsalloc(x) == 400);
/* Test sdsresize - shrink to different class */ /* Test sdsresize - shrink to different class */
x = sdsResize(x, 4, 1); x = sdsResize(x, 4, 1);
test_cond("sdsrezie() crop len", sdslen(x) == 4); test_cond("sdsresize() crop len", sdslen(x) == 4);
test_cond("sdsrezie() crop strlen", strlen(x) == 4); test_cond("sdsresize() crop strlen", strlen(x) == 4);
test_cond("sdsrezie() crop alloc", sdsalloc(x) == 4); test_cond("sdsresize() crop alloc", sdsalloc(x) == 4);
sdsfree(x); sdsfree(x);
} }
return 0; return 0;

View File

@ -198,6 +198,7 @@ sds sdsnew(const char *init);
sds sdsempty(void); sds sdsempty(void);
sds sdsdup(const sds s); sds sdsdup(const sds s);
void sdsfree(sds s); void sdsfree(sds s);
void sdsfreegeneric(void *s);
sds sdsgrowzero(sds s, size_t len); sds sdsgrowzero(sds s, size_t len);
sds sdscatlen(sds s, const void *t, size_t len); sds sdscatlen(sds s, const void *t, size_t len);
sds sdscat(sds s, const char *t); sds sdscat(sds s, const char *t);

View File

@ -521,7 +521,8 @@ dictType commandTableDictType = {
dictSdsKeyCaseCompare, /* key compare */ dictSdsKeyCaseCompare, /* key compare */
dictSdsDestructor, /* key destructor */ dictSdsDestructor, /* key destructor */
NULL, /* val destructor */ NULL, /* val destructor */
NULL /* allow to expand */ NULL, /* allow to expand */
.force_full_rehash = 1, /* force full rehashing */
}; };
/* Hash type hash table (note that small hashes are represented with listpacks) */ /* Hash type hash table (note that small hashes are represented with listpacks) */
@ -636,7 +637,8 @@ dictType clientDictType = {
NULL, /* key dup */ NULL, /* key dup */
NULL, /* val dup */ NULL, /* val dup */
dictClientKeyCompare, /* key compare */ dictClientKeyCompare, /* key compare */
.no_value = 1 /* no values in this dict */ .no_value = 1, /* no values in this dict */
.keys_are_odd = 0 /* a client pointer is not an odd pointer */
}; };
/* This function is called once a background process of some kind terminates, /* This function is called once a background process of some kind terminates,
@ -784,6 +786,23 @@ int clientsCronResizeQueryBuffer(client *c) {
return 0; return 0;
} }
/* If the client has been idle for too long, free the client's arguments. */
int clientsCronFreeArgvIfIdle(client *c) {
/* If the client is in the middle of parsing a command, or if argv is in use
* (e.g. parsed in the IO thread but not yet executed, or blocked), exit ASAP. */
if (!c->argv || c->multibulklen || c->argc) return 0;
/* Free argv if the client has been idle for more than 2 seconds or if argv
* size is too large. */
time_t idletime = server.unixtime - c->lastinteraction;
if (idletime > 2 || c->argv_len > 128) {
c->argv_len = 0;
zfree(c->argv);
c->argv = NULL;
}
return 0;
}
/* The client output buffer can be adjusted to better fit the memory requirements. /* The client output buffer can be adjusted to better fit the memory requirements.
* *
* the logic is: * the logic is:
@ -949,7 +968,7 @@ void removeClientFromMemUsageBucket(client *c, int allow_eviction) {
* returns 1 if client eviction for this client is allowed, 0 otherwise. * returns 1 if client eviction for this client is allowed, 0 otherwise.
*/ */
int updateClientMemUsageAndBucket(client *c) { int updateClientMemUsageAndBucket(client *c) {
serverAssert(io_threads_op == IO_THREADS_OP_IDLE && c->conn); serverAssert(pthread_equal(pthread_self(), server.main_thread_id) && c->conn);
int allow_eviction = clientEvictionAllowed(c); int allow_eviction = clientEvictionAllowed(c);
removeClientFromMemUsageBucket(c, allow_eviction); removeClientFromMemUsageBucket(c, allow_eviction);
@ -1001,6 +1020,7 @@ void getExpansiveClientsInfo(size_t *in_usage, size_t *out_usage) {
* default server.hz value is 10, so sometimes here we need to process thousands * default server.hz value is 10, so sometimes here we need to process thousands
* of clients per second, turning this function into a source of latency. * of clients per second, turning this function into a source of latency.
*/ */
#define CLIENTS_CRON_PAUSE_IOTHREAD 8
#define CLIENTS_CRON_MIN_ITERATIONS 5 #define CLIENTS_CRON_MIN_ITERATIONS 5
void clientsCron(void) { void clientsCron(void) {
/* Try to process at least numclients/server.hz of clients /* Try to process at least numclients/server.hz of clients
@ -1035,6 +1055,15 @@ void clientsCron(void) {
ClientsPeakMemInput[zeroidx] = 0; ClientsPeakMemInput[zeroidx] = 0;
ClientsPeakMemOutput[zeroidx] = 0; ClientsPeakMemOutput[zeroidx] = 0;
/* Pause the IO threads that are processing clients, to let us access clients
* safely. In order to avoid increasing CPU usage by pausing all threads when
* there are too many io threads, we pause io threads in multiple batches. */
static int start = 1, end = 0;
if (server.io_threads_num >= 1 && listLength(server.clients) > 0) {
end = start + CLIENTS_CRON_PAUSE_IOTHREAD - 1;
if (end >= server.io_threads_num) end = server.io_threads_num - 1;
pauseIOThreadsRange(start, end);
}
while(listLength(server.clients) && iterations--) { while(listLength(server.clients) && iterations--) {
client *c; client *c;
@ -1045,11 +1074,21 @@ void clientsCron(void) {
head = listFirst(server.clients); head = listFirst(server.clients);
c = listNodeValue(head); c = listNodeValue(head);
listRotateHeadToTail(server.clients); listRotateHeadToTail(server.clients);
if (c->running_tid != IOTHREAD_MAIN_THREAD_ID &&
!(c->running_tid >= start && c->running_tid <= end))
{
/* Skip clients that are being processed by the IO threads that
* are not paused. */
continue;
}
/* The following functions do different service checks on the client. /* The following functions do different service checks on the client.
* The protocol is that they return non-zero if the client was * The protocol is that they return non-zero if the client was
* terminated. */ * terminated. */
if (clientsCronHandleTimeout(c,now)) continue; if (clientsCronHandleTimeout(c,now)) continue;
if (clientsCronResizeQueryBuffer(c)) continue; if (clientsCronResizeQueryBuffer(c)) continue;
if (clientsCronFreeArgvIfIdle(c)) continue;
if (clientsCronResizeOutputBuffer(c,now)) continue; if (clientsCronResizeOutputBuffer(c,now)) continue;
if (clientsCronTrackExpansiveClients(c, curr_peak_mem_usage_slot)) continue; if (clientsCronTrackExpansiveClients(c, curr_peak_mem_usage_slot)) continue;
@ -1065,6 +1104,14 @@ void clientsCron(void) {
if (closeClientOnOutputBufferLimitReached(c, 0)) continue; if (closeClientOnOutputBufferLimitReached(c, 0)) continue;
} }
/* Resume the IO threads that were paused */
if (end) {
resumeIOThreadsRange(start, end);
start = end + 1;
if (start >= server.io_threads_num) start = 1;
end = 0;
}
} }
/* This function handles 'background' operations we are required to do /* This function handles 'background' operations we are required to do
@ -1397,7 +1444,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
serverLog(LL_DEBUG, serverLog(LL_DEBUG,
"%lu clients connected (%lu replicas), %zu bytes in use", "%lu clients connected (%lu replicas), %zu bytes in use",
listLength(server.clients)-listLength(server.slaves), listLength(server.clients)-listLength(server.slaves),
listLength(server.slaves), replicationLogicalReplicaCount(),
zmalloc_used_memory()); zmalloc_used_memory());
} }
} }
@ -1513,9 +1560,6 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
migrateCloseTimedoutSockets(); migrateCloseTimedoutSockets();
} }
/* Stop the I/O threads if we don't have enough pending work. */
stopThreadedIOIfNeeded();
/* Resize tracking keys table if needed. This is also done at every /* Resize tracking keys table if needed. This is also done at every
* command execution, but we want to be sure that if the last command * command execution, but we want to be sure that if the last command
* executed changes the value via CONFIG SET, the server will perform * executed changes the value via CONFIG SET, the server will perform
@ -1667,24 +1711,28 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
* events to handle. */ * events to handle. */
if (ProcessingEventsWhileBlocked) { if (ProcessingEventsWhileBlocked) {
uint64_t processed = 0; uint64_t processed = 0;
processed += handleClientsWithPendingReadsUsingThreads(); processed += connTypeProcessPendingData(server.el);
processed += connTypeProcessPendingData();
if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE) if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE)
flushAppendOnlyFile(0); flushAppendOnlyFile(0);
processed += handleClientsWithPendingWrites(); processed += handleClientsWithPendingWrites();
processed += freeClientsInAsyncFreeQueue(); processed += freeClientsInAsyncFreeQueue();
/* Let the clients after the blocking call be processed. */
processClientsOfAllIOThreads();
/* New connections may have been established while blocked, clients from
* IO thread may have replies to write, ensure they are promptly sent to
* IO threads. */
processed += sendPendingClientsToIOThreads();
server.events_processed_while_blocked += processed; server.events_processed_while_blocked += processed;
return; return;
} }
/* We should handle pending reads clients ASAP after event loop. */
handleClientsWithPendingReadsUsingThreads();
/* Handle pending data(typical TLS). (must be done before flushAppendOnlyFile) */ /* Handle pending data(typical TLS). (must be done before flushAppendOnlyFile) */
connTypeProcessPendingData(); connTypeProcessPendingData(server.el);
/* If any connection type(typical TLS) still has pending unread data don't sleep at all. */ /* If any connection type(typical TLS) still has pending unread data don't sleep at all. */
int dont_sleep = connTypeHasPendingData(); int dont_sleep = connTypeHasPendingData(server.el);
/* Call the Redis Cluster before sleep function. Note that this function /* Call the Redis Cluster before sleep function. Note that this function
* may change the state of Redis Cluster (from ok to fail or vice versa), * may change the state of Redis Cluster (from ok to fail or vice versa),
@ -1750,8 +1798,8 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
long long prev_fsynced_reploff = server.fsynced_reploff; long long prev_fsynced_reploff = server.fsynced_reploff;
/* Write the AOF buffer on disk, /* Write the AOF buffer on disk,
* must be done before handleClientsWithPendingWritesUsingThreads, * must be done before handleClientsWithPendingWrites and
* in case of appendfsync=always. */ * sendPendingClientsToIOThreads, in case of appendfsync=always. */
if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE) if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE)
flushAppendOnlyFile(0); flushAppendOnlyFile(0);
@ -1773,7 +1821,10 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
} }
/* Handle writes with pending output buffers. */ /* Handle writes with pending output buffers. */
handleClientsWithPendingWritesUsingThreads(); handleClientsWithPendingWrites();
/* Let io thread to handle its pending clients. */
sendPendingClientsToIOThreads();
/* Record cron time in beforeSleep. This does not include the time consumed by AOF writing and IO writing above. */ /* Record cron time in beforeSleep. This does not include the time consumed by AOF writing and IO writing above. */
monotime cron_start_time_after_write = getMonotonicUs(); monotime cron_start_time_after_write = getMonotonicUs();
@ -2102,6 +2153,7 @@ void initServerConfig(void) {
memset(server.blocked_clients_by_type,0, memset(server.blocked_clients_by_type,0,
sizeof(server.blocked_clients_by_type)); sizeof(server.blocked_clients_by_type));
server.shutdown_asap = 0; server.shutdown_asap = 0;
server.crashing = 0;
server.shutdown_flags = 0; server.shutdown_flags = 0;
server.shutdown_mstime = 0; server.shutdown_mstime = 0;
server.cluster_module_flags = CLUSTER_MODULE_FLAG_NONE; server.cluster_module_flags = CLUSTER_MODULE_FLAG_NONE;
@ -2131,6 +2183,8 @@ void initServerConfig(void) {
server.cached_master = NULL; server.cached_master = NULL;
server.master_initial_offset = -1; server.master_initial_offset = -1;
server.repl_state = REPL_STATE_NONE; server.repl_state = REPL_STATE_NONE;
server.repl_rdb_ch_state = REPL_RDB_CH_STATE_NONE;
server.repl_full_sync_buffer = (struct replDataBuf) {0};
server.repl_transfer_tmpfile = NULL; server.repl_transfer_tmpfile = NULL;
server.repl_transfer_fd = -1; server.repl_transfer_fd = -1;
server.repl_transfer_s = NULL; server.repl_transfer_s = NULL;
@ -2568,10 +2622,10 @@ void resetServerStats(void) {
server.stat_sync_full = 0; server.stat_sync_full = 0;
server.stat_sync_partial_ok = 0; server.stat_sync_partial_ok = 0;
server.stat_sync_partial_err = 0; server.stat_sync_partial_err = 0;
server.stat_io_reads_processed = 0; for (j = 0; j < IO_THREADS_MAX_NUM; j++) {
atomicSet(server.stat_total_reads_processed, 0); atomicSet(server.stat_io_reads_processed[j], 0);
server.stat_io_writes_processed = 0; atomicSet(server.stat_io_writes_processed[j], 0);
atomicSet(server.stat_total_writes_processed, 0); }
atomicSet(server.stat_client_qbuf_limit_disconnections, 0); atomicSet(server.stat_client_qbuf_limit_disconnections, 0);
server.stat_client_outbuf_limit_disconnections = 0; server.stat_client_outbuf_limit_disconnections = 0;
for (j = 0; j < STATS_METRIC_COUNT; j++) { for (j = 0; j < STATS_METRIC_COUNT; j++) {
@ -2627,6 +2681,8 @@ void initServer(void) {
server.hz = server.config_hz; server.hz = server.config_hz;
server.pid = getpid(); server.pid = getpid();
server.in_fork_child = CHILD_TYPE_NONE; server.in_fork_child = CHILD_TYPE_NONE;
server.rdb_pipe_read = -1;
server.rdb_child_exit_pipe = -1;
server.main_thread_id = pthread_self(); server.main_thread_id = pthread_self();
server.current_client = NULL; server.current_client = NULL;
server.errors = raxNew(); server.errors = raxNew();
@ -2690,7 +2746,7 @@ void initServer(void) {
flags |= KVSTORE_FREE_EMPTY_DICTS; flags |= KVSTORE_FREE_EMPTY_DICTS;
} }
for (j = 0; j < server.dbnum; j++) { for (j = 0; j < server.dbnum; j++) {
server.db[j].keys = kvstoreCreate(&dbDictType, slot_count_bits, flags); server.db[j].keys = kvstoreCreate(&dbDictType, slot_count_bits, flags | KVSTORE_ALLOC_META_KEYS_HIST);
server.db[j].expires = kvstoreCreate(&dbExpiresDictType, slot_count_bits, flags); server.db[j].expires = kvstoreCreate(&dbExpiresDictType, slot_count_bits, flags);
server.db[j].hexpires = ebCreate(); server.db[j].hexpires = ebCreate();
server.db[j].expires_cursor = 0; server.db[j].expires_cursor = 0;
@ -2701,7 +2757,7 @@ void initServer(void) {
server.db[j].id = j; server.db[j].id = j;
server.db[j].avg_ttl = 0; server.db[j].avg_ttl = 0;
server.db[j].defrag_later = listCreate(); server.db[j].defrag_later = listCreate();
listSetFreeMethod(server.db[j].defrag_later,(void (*)(void*))sdsfree); listSetFreeMethod(server.db[j].defrag_later, sdsfreegeneric);
} }
evictionPoolAlloc(); /* Initialize the LRU keys pool. */ evictionPoolAlloc(); /* Initialize the LRU keys pool. */
/* Note that server.pubsub_channels was chosen to be a kvstore (with only one dict, which /* Note that server.pubsub_channels was chosen to be a kvstore (with only one dict, which
@ -2763,6 +2819,7 @@ void initServer(void) {
server.aof_last_write_errno = 0; server.aof_last_write_errno = 0;
server.repl_good_slaves_count = 0; server.repl_good_slaves_count = 0;
server.last_sig_received = 0; server.last_sig_received = 0;
memset(server.io_threads_clients_num, 0, sizeof(server.io_threads_clients_num));
/* Initiate acl info struct */ /* Initiate acl info struct */
server.acl_info.invalid_cmd_accesses = 0; server.acl_info.invalid_cmd_accesses = 0;
@ -3920,12 +3977,6 @@ int processCommand(client *c) {
reqresAppendRequest(c); reqresAppendRequest(c);
} }
/* Handle possible security attacks. */
if (!strcasecmp(c->argv[0]->ptr,"host:") || !strcasecmp(c->argv[0]->ptr,"post")) {
securityWarningCommand(c);
return C_ERR;
}
/* If we're inside a module blocked context yielding that wants to avoid /* If we're inside a module blocked context yielding that wants to avoid
* processing clients, postpone the command. */ * processing clients, postpone the command. */
if (server.busy_module_yield_flags != BUSY_MODULE_YIELD_NONE && if (server.busy_module_yield_flags != BUSY_MODULE_YIELD_NONE &&
@ -3940,7 +3991,15 @@ int processCommand(client *c) {
* In case we are reprocessing a command after it was blocked, * In case we are reprocessing a command after it was blocked,
* we do not have to repeat the same checks */ * we do not have to repeat the same checks */
if (!client_reprocessing_command) { if (!client_reprocessing_command) {
c->cmd = c->lastcmd = c->realcmd = lookupCommand(c->argv,c->argc); struct redisCommand *cmd = c->iolookedcmd ? c->iolookedcmd : lookupCommand(c->argv, c->argc);
if (!cmd) {
/* Handle possible security attacks. */
if (!strcasecmp(c->argv[0]->ptr,"host:") || !strcasecmp(c->argv[0]->ptr,"post")) {
securityWarningCommand(c);
return C_ERR;
}
}
c->cmd = c->lastcmd = c->realcmd = cmd;
sds err; sds err;
if (!commandCheckExistence(c, &err)) { if (!commandCheckExistence(c, &err)) {
rejectCommandSds(c, err); rejectCommandSds(c, err);
@ -5403,7 +5462,10 @@ const char *replstateToString(int replstate) {
switch (replstate) { switch (replstate) {
case SLAVE_STATE_WAIT_BGSAVE_START: case SLAVE_STATE_WAIT_BGSAVE_START:
case SLAVE_STATE_WAIT_BGSAVE_END: case SLAVE_STATE_WAIT_BGSAVE_END:
case SLAVE_STATE_WAIT_RDB_CHANNEL:
return "wait_bgsave"; return "wait_bgsave";
case SLAVE_STATE_SEND_BULK_AND_STREAM:
return "send_bulk_and_stream";
case SLAVE_STATE_SEND_BULK: case SLAVE_STATE_SEND_BULK:
return "send_bulk"; return "send_bulk";
case SLAVE_STATE_ONLINE: case SLAVE_STATE_ONLINE:
@ -5520,8 +5582,8 @@ void releaseInfoSectionDict(dict *sec) {
* The resulting dictionary should be released with releaseInfoSectionDict. */ * The resulting dictionary should be released with releaseInfoSectionDict. */
dict *genInfoSectionDict(robj **argv, int argc, char **defaults, int *out_all, int *out_everything) { dict *genInfoSectionDict(robj **argv, int argc, char **defaults, int *out_all, int *out_everything) {
char *default_sections[] = { char *default_sections[] = {
"server", "clients", "memory", "persistence", "stats", "replication", "server", "clients", "memory", "persistence", "stats", "replication", "threads",
"cpu", "module_list", "errorstats", "cluster", "keyspace", NULL}; "cpu", "module_list", "errorstats", "cluster", "keyspace", "keysizes", NULL};
if (!defaults) if (!defaults)
defaults = default_sections; defaults = default_sections;
@ -5686,8 +5748,8 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
size_t zmalloc_used = zmalloc_used_memory(); size_t zmalloc_used = zmalloc_used_memory();
size_t total_system_mem = server.system_memory_size; size_t total_system_mem = server.system_memory_size;
const char *evict_policy = evictPolicyToString(); const char *evict_policy = evictPolicyToString();
long long memory_lua = evalMemory(); long long memory_lua = evalScriptsMemoryVM();
long long memory_functions = functionsMemory(); long long memory_functions = functionsMemoryVM();
struct redisMemOverhead *mh = getMemoryOverheadData(); struct redisMemOverhead *mh = getMemoryOverheadData();
/* Peak memory is updated from time to time by serverCron() so it /* Peak memory is updated from time to time by serverCron() so it
@ -5702,7 +5764,7 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
bytesToHuman(total_system_hmem,sizeof(total_system_hmem),total_system_mem); bytesToHuman(total_system_hmem,sizeof(total_system_hmem),total_system_mem);
bytesToHuman(used_memory_lua_hmem,sizeof(used_memory_lua_hmem),memory_lua); bytesToHuman(used_memory_lua_hmem,sizeof(used_memory_lua_hmem),memory_lua);
bytesToHuman(used_memory_vm_total_hmem,sizeof(used_memory_vm_total_hmem),memory_functions + memory_lua); bytesToHuman(used_memory_vm_total_hmem,sizeof(used_memory_vm_total_hmem),memory_functions + memory_lua);
bytesToHuman(used_memory_scripts_hmem,sizeof(used_memory_scripts_hmem),mh->lua_caches + mh->functions_caches); bytesToHuman(used_memory_scripts_hmem,sizeof(used_memory_scripts_hmem),mh->eval_caches + mh->functions_caches);
bytesToHuman(used_memory_rss_hmem,sizeof(used_memory_rss_hmem),server.cron_malloc_stats.process_rss); bytesToHuman(used_memory_rss_hmem,sizeof(used_memory_rss_hmem),server.cron_malloc_stats.process_rss);
bytesToHuman(maxmemory_hmem,sizeof(maxmemory_hmem),server.maxmemory); bytesToHuman(maxmemory_hmem,sizeof(maxmemory_hmem),server.maxmemory);
@ -5728,7 +5790,7 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
"used_memory_lua:%lld\r\n", memory_lua, /* deprecated, renamed to used_memory_vm_eval */ "used_memory_lua:%lld\r\n", memory_lua, /* deprecated, renamed to used_memory_vm_eval */
"used_memory_vm_eval:%lld\r\n", memory_lua, "used_memory_vm_eval:%lld\r\n", memory_lua,
"used_memory_lua_human:%s\r\n", used_memory_lua_hmem, /* deprecated */ "used_memory_lua_human:%s\r\n", used_memory_lua_hmem, /* deprecated */
"used_memory_scripts_eval:%lld\r\n", (long long)mh->lua_caches, "used_memory_scripts_eval:%lld\r\n", (long long)mh->eval_caches,
"number_of_cached_scripts:%lu\r\n", dictSize(evalScriptsDict()), "number_of_cached_scripts:%lu\r\n", dictSize(evalScriptsDict()),
"number_of_functions:%lu\r\n", functionsNum(), "number_of_functions:%lu\r\n", functionsNum(),
"number_of_libraries:%lu\r\n", functionsLibNum(), "number_of_libraries:%lu\r\n", functionsLibNum(),
@ -5736,7 +5798,7 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
"used_memory_vm_total:%lld\r\n", memory_functions + memory_lua, "used_memory_vm_total:%lld\r\n", memory_functions + memory_lua,
"used_memory_vm_total_human:%s\r\n", used_memory_vm_total_hmem, "used_memory_vm_total_human:%s\r\n", used_memory_vm_total_hmem,
"used_memory_functions:%lld\r\n", (long long)mh->functions_caches, "used_memory_functions:%lld\r\n", (long long)mh->functions_caches,
"used_memory_scripts:%lld\r\n", (long long)mh->lua_caches + (long long)mh->functions_caches, "used_memory_scripts:%lld\r\n", (long long)mh->eval_caches + (long long)mh->functions_caches,
"used_memory_scripts_human:%s\r\n", used_memory_scripts_hmem, "used_memory_scripts_human:%s\r\n", used_memory_scripts_hmem,
"maxmemory:%lld\r\n", server.maxmemory, "maxmemory:%lld\r\n", server.maxmemory,
"maxmemory_human:%s\r\n", maxmemory_hmem, "maxmemory_human:%s\r\n", maxmemory_hmem,
@ -5861,9 +5923,29 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
} }
} }
/* Threads */
int stat_io_ops_processed_calculated = 0;
long long stat_io_reads_processed = 0, stat_io_writes_processed = 0;
long long stat_total_reads_processed = 0, stat_total_writes_processed = 0;
if (all_sections || (dictFind(section_dict,"threads") != NULL)) {
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info, "# Threads\r\n");
long long reads, writes;
for (j = 0; j < server.io_threads_num; j++) {
atomicGet(server.stat_io_reads_processed[j], reads);
atomicGet(server.stat_io_writes_processed[j], writes);
info = sdscatprintf(info, "io_thread_%d:clients=%d,reads=%lld,writes=%lld\r\n",
j, server.io_threads_clients_num[j], reads, writes);
stat_total_reads_processed += reads;
if (j != 0) stat_io_reads_processed += reads; /* Skip the main thread */
stat_total_writes_processed += writes;
if (j != 0) stat_io_writes_processed += writes; /* Skip the main thread */
}
stat_io_ops_processed_calculated = 1;
}
/* Stats */ /* Stats */
if (all_sections || (dictFind(section_dict,"stats") != NULL)) { if (all_sections || (dictFind(section_dict,"stats") != NULL)) {
long long stat_total_reads_processed, stat_total_writes_processed;
long long stat_net_input_bytes, stat_net_output_bytes; long long stat_net_input_bytes, stat_net_output_bytes;
long long stat_net_repl_input_bytes, stat_net_repl_output_bytes; long long stat_net_repl_input_bytes, stat_net_repl_output_bytes;
long long current_eviction_exceeded_time = server.stat_last_eviction_exceeded_time ? long long current_eviction_exceeded_time = server.stat_last_eviction_exceeded_time ?
@ -5871,14 +5953,26 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
long long current_active_defrag_time = server.stat_last_active_defrag_time ? long long current_active_defrag_time = server.stat_last_active_defrag_time ?
(long long) elapsedUs(server.stat_last_active_defrag_time): 0; (long long) elapsedUs(server.stat_last_active_defrag_time): 0;
long long stat_client_qbuf_limit_disconnections; long long stat_client_qbuf_limit_disconnections;
atomicGet(server.stat_total_reads_processed, stat_total_reads_processed);
atomicGet(server.stat_total_writes_processed, stat_total_writes_processed);
atomicGet(server.stat_net_input_bytes, stat_net_input_bytes); atomicGet(server.stat_net_input_bytes, stat_net_input_bytes);
atomicGet(server.stat_net_output_bytes, stat_net_output_bytes); atomicGet(server.stat_net_output_bytes, stat_net_output_bytes);
atomicGet(server.stat_net_repl_input_bytes, stat_net_repl_input_bytes); atomicGet(server.stat_net_repl_input_bytes, stat_net_repl_input_bytes);
atomicGet(server.stat_net_repl_output_bytes, stat_net_repl_output_bytes); atomicGet(server.stat_net_repl_output_bytes, stat_net_repl_output_bytes);
atomicGet(server.stat_client_qbuf_limit_disconnections, stat_client_qbuf_limit_disconnections); atomicGet(server.stat_client_qbuf_limit_disconnections, stat_client_qbuf_limit_disconnections);
/* If we calculated the total reads and writes in the threads section,
* we don't need to do it again, and also keep the values consistent. */
if (!stat_io_ops_processed_calculated) {
long long reads, writes;
for (j = 0; j < server.io_threads_num; j++) {
atomicGet(server.stat_io_reads_processed[j], reads);
stat_total_reads_processed += reads;
if (j != 0) stat_io_reads_processed += reads; /* Skip the main thread */
atomicGet(server.stat_io_writes_processed[j], writes);
stat_total_writes_processed += writes;
if (j != 0) stat_io_writes_processed += writes; /* Skip the main thread */
}
}
if (sections++) info = sdscat(info,"\r\n"); if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info, "# Stats\r\n" FMTARGS( info = sdscatprintf(info, "# Stats\r\n" FMTARGS(
"total_connections_received:%lld\r\n", server.stat_numconnections, "total_connections_received:%lld\r\n", server.stat_numconnections,
@ -5929,8 +6023,8 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
"dump_payload_sanitizations:%lld\r\n", server.stat_dump_payload_sanitizations, "dump_payload_sanitizations:%lld\r\n", server.stat_dump_payload_sanitizations,
"total_reads_processed:%lld\r\n", stat_total_reads_processed, "total_reads_processed:%lld\r\n", stat_total_reads_processed,
"total_writes_processed:%lld\r\n", stat_total_writes_processed, "total_writes_processed:%lld\r\n", stat_total_writes_processed,
"io_threaded_reads_processed:%lld\r\n", server.stat_io_reads_processed, "io_threaded_reads_processed:%lld\r\n", stat_io_reads_processed,
"io_threaded_writes_processed:%lld\r\n", server.stat_io_writes_processed, "io_threaded_writes_processed:%lld\r\n", stat_io_writes_processed,
"client_query_buffer_limit_disconnections:%lld\r\n", stat_client_qbuf_limit_disconnections, "client_query_buffer_limit_disconnections:%lld\r\n", stat_client_qbuf_limit_disconnections,
"client_output_buffer_limit_disconnections:%lld\r\n", server.stat_client_outbuf_limit_disconnections, "client_output_buffer_limit_disconnections:%lld\r\n", server.stat_client_outbuf_limit_disconnections,
"reply_buffer_shrinks:%lld\r\n", server.stat_reply_buffer_shrinks, "reply_buffer_shrinks:%lld\r\n", server.stat_reply_buffer_shrinks,
@ -5969,7 +6063,9 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
"master_last_io_seconds_ago:%d\r\n", server.master ? ((int)(server.unixtime-server.master->lastinteraction)) : -1, "master_last_io_seconds_ago:%d\r\n", server.master ? ((int)(server.unixtime-server.master->lastinteraction)) : -1,
"master_sync_in_progress:%d\r\n", server.repl_state == REPL_STATE_TRANSFER, "master_sync_in_progress:%d\r\n", server.repl_state == REPL_STATE_TRANSFER,
"slave_read_repl_offset:%lld\r\n", slave_read_repl_offset, "slave_read_repl_offset:%lld\r\n", slave_read_repl_offset,
"slave_repl_offset:%lld\r\n", slave_repl_offset)); "slave_repl_offset:%lld\r\n", slave_repl_offset,
"replica_full_sync_buffer_size:%zu\r\n", server.repl_full_sync_buffer.size,
"replica_full_sync_buffer_peak:%zu\r\n", server.repl_full_sync_buffer.peak));
if (server.repl_state == REPL_STATE_TRANSFER) { if (server.repl_state == REPL_STATE_TRANSFER) {
double perc = 0; double perc = 0;
@ -5998,7 +6094,7 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
info = sdscatprintf(info, info = sdscatprintf(info,
"connected_slaves:%lu\r\n", "connected_slaves:%lu\r\n",
listLength(server.slaves)); replicationLogicalReplicaCount());
/* If min-slaves-to-write is active, write the number of slaves /* If min-slaves-to-write is active, write the number of slaves
* currently considered 'good'. */ * currently considered 'good'. */
@ -6021,6 +6117,14 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
int port; int port;
long lag = 0; long lag = 0;
/* During rdbchannel replication, replica opens two connections.
* These are distinct slaves in server.slaves list from master
* POV. We don't want to list these separately. If a rdbchannel
* replica has an associated main-channel replica in
* server.slaves list, we'll list main channel replica only. */
if (replicationCheckHasMainChannel(slave))
continue;
if (!slaveip) { if (!slaveip) {
if (connAddrPeerName(slave->conn,ip,sizeof(ip),&port) == -1) if (connAddrPeerName(slave->conn,ip,sizeof(ip),&port) == -1)
continue; continue;
@ -6149,6 +6253,60 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
} }
} }
/* keysizes */
if (all_sections || (dictFind(section_dict,"keysizes") != NULL)) {
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info, "# Keysizes\r\n");
char *typestr[] = {
[OBJ_STRING] = "distrib_strings_sizes",
[OBJ_LIST] = "distrib_lists_items",
[OBJ_SET] = "distrib_sets_items",
[OBJ_ZSET] = "distrib_zsets_items",
[OBJ_HASH] = "distrib_hashes_items"
};
serverAssert(sizeof(typestr)/sizeof(typestr[0]) == OBJ_TYPE_BASIC_MAX);
for (int dbnum = 0; dbnum < server.dbnum; dbnum++) {
char *expSizeLabels[] = {
"1", "2", "4", "8", "16", "32", "64", "128", "256", "512", /* Byte */
"1K", "2K", "4K", "8K", "16K", "32K", "64K", "128K", "256K", "512K", /* Kilo */
"1M", "2M", "4M", "8M", "16M", "32M", "64M", "128M", "256M", "512M", /* Mega */
"1G", "2G", "4G", "8G", "16G", "32G", "64G", "128G", "256G", "512G", /* Giga */
"1T", "2T", "4T", "8T", "16T", "32T", "64T", "128T", "256T", "512T", /* Tera */
"1P", "2P", "4P", "8P", "16P", "32P", "64P", "128P", "256P", "512P", /* Peta */
"1E", "2E", "4E", "8E" /* Exa */
};
if (kvstoreSize(server.db[dbnum].keys) == 0)
continue;
for (int type = 0; type < OBJ_TYPE_BASIC_MAX; type++) {
uint64_t *kvstoreHist = kvstoreGetMetadata(server.db[dbnum].keys)->keysizes_hist[type];
char buf[10000];
int cnt = 0, buflen = 0;
/* Print histogram to temp buf[]. First bin is garbage */
buflen += snprintf(buf + buflen, sizeof(buf) - buflen, "db%d_%s:", dbnum, typestr[type]);
for (int i = 0; i < MAX_KEYSIZES_BINS; i++) {
if (kvstoreHist[i] == 0)
continue;
int res = snprintf(buf + buflen, sizeof(buf) - buflen,
(cnt == 0) ? "%s=%llu" : ",%s=%llu",
expSizeLabels[i], (unsigned long long) kvstoreHist[i]);
if (res < 0) break;
buflen += res;
cnt += kvstoreHist[i];
}
/* Print the temp buf[] to the info string */
if (cnt) info = sdscatprintf(info, "%s\r\n", buf);
}
}
}
/* Get info from modules. /* Get info from modules.
* Returned when the user asked for "everything", "modules", or a specific module section. * Returned when the user asked for "everything", "modules", or a specific module section.
* We're not aware of the module section names here, and we rather avoid the search when we can. * We're not aware of the module section names here, and we rather avoid the search when we can.

View File

@ -41,10 +41,6 @@
#include <systemd/sd-daemon.h> #include <systemd/sd-daemon.h>
#endif #endif
#ifndef static_assert
#define static_assert(expr, lit) extern char __static_assert_failure[(expr) ? 1:-1]
#endif
typedef long long mstime_t; /* millisecond time type. */ typedef long long mstime_t; /* millisecond time type. */
typedef long long ustime_t; /* microsecond time type. */ typedef long long ustime_t; /* microsecond time type. */
@ -65,6 +61,7 @@ typedef long long ustime_t; /* microsecond time type. */
N-elements flat arrays */ N-elements flat arrays */
#include "rax.h" /* Radix tree */ #include "rax.h" /* Radix tree */
#include "connection.h" /* Connection abstraction */ #include "connection.h" /* Connection abstraction */
#include "eventnotifier.h" /* Event notification */
#define REDISMODULE_CORE 1 #define REDISMODULE_CORE 1
typedef struct redisObject robj; typedef struct redisObject robj;
@ -95,6 +92,7 @@ struct hdr_histogram;
/* Error codes */ /* Error codes */
#define C_OK 0 #define C_OK 0
#define C_ERR -1 #define C_ERR -1
#define C_RETRY -2
/* Static server configuration */ /* Static server configuration */
#define CONFIG_DEFAULT_HZ 10 /* Time interrupt calls/sec. */ #define CONFIG_DEFAULT_HZ 10 /* Time interrupt calls/sec. */
@ -188,6 +186,14 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
/* Hash table parameters */ /* Hash table parameters */
#define HASHTABLE_MAX_LOAD_FACTOR 1.618 /* Maximum hash table load factor. */ #define HASHTABLE_MAX_LOAD_FACTOR 1.618 /* Maximum hash table load factor. */
/* Max number of IO threads */
#define IO_THREADS_MAX_NUM 128
/* Main thread id for doing IO work, whatever we enable or disable io thread
* the main thread always does IO work, so we can consider that the main thread
* is the io thread 0. */
#define IOTHREAD_MAIN_THREAD_ID 0
/* Command flags. Please check the definition of struct redisCommand in this file /* Command flags. Please check the definition of struct redisCommand in this file
* for more information about the meaning of every flag. */ * for more information about the meaning of every flag. */
#define CMD_WRITE (1ULL<<0) #define CMD_WRITE (1ULL<<0)
@ -278,6 +284,7 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
* out to all keys it should cover */ * out to all keys it should cover */
#define CMD_KEY_VARIABLE_FLAGS (1ULL<<10) /* Means that some keys might have #define CMD_KEY_VARIABLE_FLAGS (1ULL<<10) /* Means that some keys might have
* different flags depending on arguments */ * different flags depending on arguments */
#define CMD_KEY_PREFIX (1ULL<<11) /* Given key represents a prefix of a set of keys */
/* Key flags for when access type is unknown */ /* Key flags for when access type is unknown */
#define CMD_KEY_FULL_ACCESS (CMD_KEY_RW | CMD_KEY_ACCESS | CMD_KEY_UPDATE) #define CMD_KEY_FULL_ACCESS (CMD_KEY_RW | CMD_KEY_ACCESS | CMD_KEY_UPDATE)
@ -388,11 +395,34 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
#define CLIENT_MODULE_PREVENT_AOF_PROP (1ULL<<48) /* Module client do not want to propagate to AOF */ #define CLIENT_MODULE_PREVENT_AOF_PROP (1ULL<<48) /* Module client do not want to propagate to AOF */
#define CLIENT_MODULE_PREVENT_REPL_PROP (1ULL<<49) /* Module client do not want to propagate to replica */ #define CLIENT_MODULE_PREVENT_REPL_PROP (1ULL<<49) /* Module client do not want to propagate to replica */
#define CLIENT_REPROCESSING_COMMAND (1ULL<<50) /* The client is re-processing the command. */ #define CLIENT_REPROCESSING_COMMAND (1ULL<<50) /* The client is re-processing the command. */
#define CLIENT_REUSABLE_QUERYBUFFER (1ULL<<51) /* The client is using the reusable query buffer. */ #define CLIENT_REPL_RDB_CHANNEL (1ULL<<51) /* Client which is used for rdb delivery as part of rdb channel replication */
/* Any flag that does not let optimize FLUSH SYNC to run it in bg as blocking client ASYNC */ /* Any flag that does not let optimize FLUSH SYNC to run it in bg as blocking client ASYNC */
#define CLIENT_AVOID_BLOCKING_ASYNC_FLUSH (CLIENT_DENY_BLOCKING|CLIENT_MULTI|CLIENT_LUA_DEBUG|CLIENT_LUA_DEBUG_SYNC|CLIENT_MODULE) #define CLIENT_AVOID_BLOCKING_ASYNC_FLUSH (CLIENT_DENY_BLOCKING|CLIENT_MULTI|CLIENT_LUA_DEBUG|CLIENT_LUA_DEBUG_SYNC|CLIENT_MODULE)
/* Client flags for client IO */
#define CLIENT_IO_READ_ENABLED (1ULL<<0) /* Client can read from socket. */
#define CLIENT_IO_WRITE_ENABLED (1ULL<<1) /* Client can write to socket. */
#define CLIENT_IO_PENDING_COMMAND (1ULL<<2) /* Similar to CLIENT_PENDING_COMMAND. */
#define CLIENT_IO_REUSABLE_QUERYBUFFER (1ULL<<3) /* The client is using the reusable query buffer. */
#define CLIENT_IO_CLOSE_ASAP (1ULL<<4) /* Close this client ASAP in IO thread. */
/* Definitions for client read errors. These error codes are used to indicate
* various issues that can occur while reading or parsing data from a client. */
#define CLIENT_READ_TOO_BIG_INLINE_REQUEST 1
#define CLIENT_READ_UNBALANCED_QUOTES 2
#define CLIENT_READ_MASTER_USING_INLINE_PROTOCAL 3
#define CLIENT_READ_TOO_BIG_MBULK_COUNT_STRING 4
#define CLIENT_READ_TOO_BIG_BUCK_COUNT_STRING 5
#define CLIENT_READ_EXPECTED_DOLLAR 6
#define CLIENT_READ_INVALID_BUCK_LENGTH 7
#define CLIENT_READ_UNAUTH_BUCK_LENGTH 8
#define CLIENT_READ_INVALID_MULTIBUCK_LENGTH 9
#define CLIENT_READ_UNAUTH_MBUCK_COUNT 10
#define CLIENT_READ_CONN_DISCONNECTED 11
#define CLIENT_READ_CONN_CLOSED 12
#define CLIENT_READ_REACHED_MAX_QUERYBUF 13
/* Client block type (btype field in client structure) /* Client block type (btype field in client structure)
* if CLIENT_BLOCKED flag is set. */ * if CLIENT_BLOCKED flag is set. */
typedef enum blocking_type { typedef enum blocking_type {
@ -445,6 +475,24 @@ typedef enum {
REPL_STATE_CONNECTED, /* Connected to master */ REPL_STATE_CONNECTED, /* Connected to master */
} repl_state; } repl_state;
/* Replica rdb channel replication state. Used in server.repl_rdb_ch_state for
* replicas to remember what to do next. */
typedef enum {
REPL_RDB_CH_STATE_CLOSE_ASAP = -1, /* Async error state */
REPL_RDB_CH_STATE_NONE = 0, /* No active rdb channel sync */
REPL_RDB_CH_SEND_HANDSHAKE, /* Send handshake sequence to master */
REPL_RDB_CH_RECEIVE_AUTH_REPLY, /* Wait for AUTH reply */
REPL_RDB_CH_RECEIVE_REPLCONF_REPLY, /* Wait for REPLCONF reply */
REPL_RDB_CH_RECEIVE_FULLRESYNC, /* Wait for +FULLRESYNC reply */
REPL_RDB_CH_RDB_LOADING, /* Loading rdb using rdb channel */
} repl_rdb_channel_state;
/* Replication debug flags for testing. */
#define REPL_DEBUG_PAUSE_NONE (1 << 0)
#define REPL_DEBUG_AFTER_FORK (1 << 1)
#define REPL_DEBUG_BEFORE_RDB_CHANNEL (1 << 2)
#define REPL_DEBUG_ON_STREAMING_REPL_BUF (1 << 3)
/* The state of an in progress coordinated failover */ /* The state of an in progress coordinated failover */
typedef enum { typedef enum {
NO_FAILOVER = 0, /* No failover in progress */ NO_FAILOVER = 0, /* No failover in progress */
@ -463,16 +511,22 @@ typedef enum {
#define SLAVE_STATE_ONLINE 9 /* RDB file transmitted, sending just updates. */ #define SLAVE_STATE_ONLINE 9 /* RDB file transmitted, sending just updates. */
#define SLAVE_STATE_RDB_TRANSMITTED 10 /* RDB file transmitted - This state is used only for #define SLAVE_STATE_RDB_TRANSMITTED 10 /* RDB file transmitted - This state is used only for
* a replica that only wants RDB without replication buffer */ * a replica that only wants RDB without replication buffer */
#define SLAVE_STATE_WAIT_RDB_CHANNEL 11 /* Main channel of replica is connected,
* we are waiting rdbchannel connection to start delivery.*/
#define SLAVE_STATE_SEND_BULK_AND_STREAM 12 /* Main channel of a replica which uses rdb channel replication.
* Sending RDB file and replication stream in parallel. */
/* Slave capabilities. */ /* Slave capabilities. */
#define SLAVE_CAPA_NONE 0 #define SLAVE_CAPA_NONE 0
#define SLAVE_CAPA_EOF (1<<0) /* Can parse the RDB EOF streaming format. */ #define SLAVE_CAPA_EOF (1<<0) /* Can parse the RDB EOF streaming format. */
#define SLAVE_CAPA_PSYNC2 (1<<1) /* Supports PSYNC2 protocol. */ #define SLAVE_CAPA_PSYNC2 (1<<1) /* Supports PSYNC2 protocol. */
#define SLAVE_CAPA_RDB_CHANNEL_REPL (1<<2) /* Supports rdb channel replication during full sync */
/* Slave requirements */ /* Slave requirements */
#define SLAVE_REQ_NONE 0 #define SLAVE_REQ_NONE 0
#define SLAVE_REQ_RDB_EXCLUDE_DATA (1 << 0) /* Exclude data from RDB */ #define SLAVE_REQ_RDB_EXCLUDE_DATA (1 << 0) /* Exclude data from RDB */
#define SLAVE_REQ_RDB_EXCLUDE_FUNCTIONS (1 << 1) /* Exclude functions from RDB */ #define SLAVE_REQ_RDB_EXCLUDE_FUNCTIONS (1 << 1) /* Exclude functions from RDB */
#define SLAVE_REQ_RDB_CHANNEL (1 << 2) /* Use rdb channel replication */
/* Mask of all bits in the slave requirements bitfield that represent non-standard (filtered) RDB requirements */ /* Mask of all bits in the slave requirements bitfield that represent non-standard (filtered) RDB requirements */
#define SLAVE_REQ_RDB_MASK (SLAVE_REQ_RDB_EXCLUDE_DATA | SLAVE_REQ_RDB_EXCLUDE_FUNCTIONS) #define SLAVE_REQ_RDB_MASK (SLAVE_REQ_RDB_EXCLUDE_DATA | SLAVE_REQ_RDB_EXCLUDE_FUNCTIONS)
@ -581,6 +635,12 @@ typedef enum {
#define SHUTDOWN_NOW 4 /* Don't wait for replicas to catch up. */ #define SHUTDOWN_NOW 4 /* Don't wait for replicas to catch up. */
#define SHUTDOWN_FORCE 8 /* Don't let errors prevent shutdown. */ #define SHUTDOWN_FORCE 8 /* Don't let errors prevent shutdown. */
/* IO thread pause status */
#define IO_THREAD_UNPAUSED 0
#define IO_THREAD_PAUSING 1
#define IO_THREAD_PAUSED 2
#define IO_THREAD_RESUMING 3
/* Command call flags, see call() function */ /* Command call flags, see call() function */
#define CMD_CALL_NONE 0 #define CMD_CALL_NONE 0
#define CMD_CALL_PROPAGATE_AOF (1<<0) #define CMD_CALL_PROPAGATE_AOF (1<<0)
@ -698,6 +758,7 @@ typedef enum {
#define OBJ_SET 2 /* Set object. */ #define OBJ_SET 2 /* Set object. */
#define OBJ_ZSET 3 /* Sorted set object. */ #define OBJ_ZSET 3 /* Sorted set object. */
#define OBJ_HASH 4 /* Hash object. */ #define OBJ_HASH 4 /* Hash object. */
#define OBJ_TYPE_BASIC_MAX 5 /* Max number of basic object types. */
/* The "module" object type is a special one that signals that the object /* The "module" object type is a special one that signals that the object
* is one directly managed by a Redis module. In this case the value points * is one directly managed by a Redis module. In this case the value points
@ -969,7 +1030,7 @@ typedef struct replBufBlock {
* by integers from 0 (the default database) up to the max configured * by integers from 0 (the default database) up to the max configured
* database. The database number is the 'id' field in the structure. */ * database. The database number is the 'id' field in the structure. */
typedef struct redisDb { typedef struct redisDb {
kvstore *keys; /* The keyspace for this DB */ kvstore *keys; /* The keyspace for this DB. As metadata, holds keysizes histogram */
kvstore *expires; /* Timeout of keys with a timeout set */ kvstore *expires; /* Timeout of keys with a timeout set */
ebuckets hexpires; /* Hash expiration DS. Single TTL per hash (of next min field to expire) */ ebuckets hexpires; /* Hash expiration DS. Single TTL per hash (of next min field to expire) */
dict *blocking_keys; /* Keys with clients waiting for data (BLPOP)*/ dict *blocking_keys; /* Keys with clients waiting for data (BLPOP)*/
@ -1127,6 +1188,23 @@ typedef struct replBacklog {
* byte in the replication backlog buffer.*/ * byte in the replication backlog buffer.*/
} replBacklog; } replBacklog;
/* Used by replDataBuf during rdb channel replication to accumulate replication
* stream on replica side. */
typedef struct replDataBufBlock {
size_t used; /* Used bytes in the buf */
size_t size; /* Size of the buf */
char buf[]; /* Replication data */
} replDataBufBlock;
/* Linked list of replDataBufBlock structs, holds replication stream during
* rdb channel replication on replica side. */
typedef struct replDataBuf {
list *blocks; /* List of replDataBufBlock */
size_t size; /* Total number of bytes available in all blocks. */
size_t used; /* Total number of bytes actually used in all blocks. */
size_t peak; /* Peak number of bytes stored in all blocks. */
} replDataBuf;
typedef struct { typedef struct {
list *clients; list *clients;
size_t mem_usage_sum; size_t mem_usage_sum;
@ -1161,6 +1239,10 @@ typedef struct client {
uint64_t id; /* Client incremental unique ID. */ uint64_t id; /* Client incremental unique ID. */
uint64_t flags; /* Client flags: CLIENT_* macros. */ uint64_t flags; /* Client flags: CLIENT_* macros. */
connection *conn; connection *conn;
uint8_t tid; /* Thread assigned ID this client is bound to. */
uint8_t running_tid; /* Thread assigned ID this client is running on. */
uint8_t io_flags; /* Accessed by both main and IO threads, but not modified concurrently */
uint8_t read_error; /* Client read error: CLIENT_READ_* macros. */
int resp; /* RESP protocol version. Can be 2 or 3. */ int resp; /* RESP protocol version. Can be 2 or 3. */
redisDb *db; /* Pointer to currently SELECTed DB. */ redisDb *db; /* Pointer to currently SELECTed DB. */
robj *name; /* As set by CLIENT SETNAME. */ robj *name; /* As set by CLIENT SETNAME. */
@ -1176,6 +1258,7 @@ typedef struct client {
robj **original_argv; /* Arguments of original command if arguments were rewritten. */ robj **original_argv; /* Arguments of original command if arguments were rewritten. */
size_t argv_len_sum; /* Sum of lengths of objects in argv list. */ size_t argv_len_sum; /* Sum of lengths of objects in argv list. */
struct redisCommand *cmd, *lastcmd; /* Last command executed. */ struct redisCommand *cmd, *lastcmd; /* Last command executed. */
struct redisCommand *iolookedcmd; /* Command looked up in IO threads. */
struct redisCommand *realcmd; /* The original command that was executed by the client, struct redisCommand *realcmd; /* The original command that was executed by the client,
Used to update error stats in case the c->cmd was modified Used to update error stats in case the c->cmd was modified
during the command invocation (like on GEOADD for example). */ during the command invocation (like on GEOADD for example). */
@ -1218,6 +1301,7 @@ typedef struct client {
char *slave_addr; /* Optionally given by REPLCONF ip-address */ char *slave_addr; /* Optionally given by REPLCONF ip-address */
int slave_capa; /* Slave capabilities: SLAVE_CAPA_* bitwise OR. */ int slave_capa; /* Slave capabilities: SLAVE_CAPA_* bitwise OR. */
int slave_req; /* Slave requirements: SLAVE_REQ_* */ int slave_req; /* Slave requirements: SLAVE_REQ_* */
uint64_t main_ch_client_id; /* The client id of this replica's main channel */
multiState mstate; /* MULTI/EXEC state */ multiState mstate; /* MULTI/EXEC state */
blockingState bstate; /* blocking state */ blockingState bstate; /* blocking state */
long long woff; /* Last write global replication offset. */ long long woff; /* Last write global replication offset. */
@ -1228,8 +1312,8 @@ typedef struct client {
sds peerid; /* Cached peer ID. */ sds peerid; /* Cached peer ID. */
sds sockname; /* Cached connection target address. */ sds sockname; /* Cached connection target address. */
listNode *client_list_node; /* list node in client list */ listNode *client_list_node; /* list node in client list */
listNode *io_thread_client_list_node; /* list node in io thread client list */
listNode *postponed_list_node; /* list node within the postponed list */ listNode *postponed_list_node; /* list node within the postponed list */
listNode *pending_read_list_node; /* list node in clients pending read list */
void *module_blocked_client; /* Pointer to the RedisModuleBlockedClient associated with this void *module_blocked_client; /* Pointer to the RedisModuleBlockedClient associated with this
* client. This is set in case of module authentication before the * client. This is set in case of module authentication before the
* unblocked client is reprocessed to handle reply callbacks. */ * unblocked client is reprocessed to handle reply callbacks. */
@ -1282,6 +1366,20 @@ typedef struct client {
#endif #endif
} client; } client;
typedef struct __attribute__((aligned(CACHE_LINE_SIZE))) {
uint8_t id; /* The unique ID assigned, if IO_THREADS_MAX_NUM is more
* than 256, we should also promote the data type. */
pthread_t tid; /* Pthread ID */
redisAtomic int paused; /* Paused status for the io thread. */
aeEventLoop *el; /* Main event loop of io thread. */
list *pending_clients; /* List of clients with pending writes. */
list *processing_clients; /* List of clients being processed. */
eventNotifier *pending_clients_notifier; /* Used to wake up the loop when write should be performed. */
pthread_mutex_t pending_clients_mutex; /* Mutex for pending write list */
list *pending_clients_to_main_thread; /* Clients that are waiting to be executed by the main thread. */
list *clients; /* IO thread managed clients. */
} IOThread;
/* ACL information */ /* ACL information */
typedef struct aclInfo { typedef struct aclInfo {
long long user_auth_failures; /* Auth failure counts on user level */ long long user_auth_failures; /* Auth failure counts on user level */
@ -1404,8 +1502,9 @@ struct redisMemOverhead {
size_t clients_normal; size_t clients_normal;
size_t cluster_links; size_t cluster_links;
size_t aof_buffer; size_t aof_buffer;
size_t lua_caches; size_t eval_caches;
size_t functions_caches; size_t functions_caches;
size_t script_vm;
size_t overhead_total; size_t overhead_total;
size_t dataset; size_t dataset;
size_t total_keys; size_t total_keys;
@ -1569,6 +1668,7 @@ struct redisServer {
int errors_enabled; /* If true, errorstats is enabled, and we will add new errors. */ int errors_enabled; /* If true, errorstats is enabled, and we will add new errors. */
unsigned int lruclock; /* Clock for LRU eviction */ unsigned int lruclock; /* Clock for LRU eviction */
volatile sig_atomic_t shutdown_asap; /* Shutdown ordered by signal handler. */ volatile sig_atomic_t shutdown_asap; /* Shutdown ordered by signal handler. */
volatile sig_atomic_t crashing; /* Server is crashing report. */
mstime_t shutdown_mstime; /* Timestamp to limit graceful shutdown. */ mstime_t shutdown_mstime; /* Timestamp to limit graceful shutdown. */
int last_sig_received; /* Indicates the last SIGNAL received, if any (e.g., SIGINT or SIGTERM). */ int last_sig_received; /* Indicates the last SIGNAL received, if any (e.g., SIGINT or SIGTERM). */
int shutdown_flags; /* Flags passed to prepareForShutdown(). */ int shutdown_flags; /* Flags passed to prepareForShutdown(). */
@ -1592,7 +1692,7 @@ struct redisServer {
dict *moduleapi; /* Exported core APIs dictionary for modules. */ dict *moduleapi; /* Exported core APIs dictionary for modules. */
dict *sharedapi; /* Like moduleapi but containing the APIs that dict *sharedapi; /* Like moduleapi but containing the APIs that
modules share with each other. */ modules share with each other. */
dict *module_configs_queue; /* Dict that stores module configurations from .conf file until after modules are loaded during startup or arguments to loadex. */ dict *module_configs_queue; /* Unmapped configs are queued here, assumed to be module config. Applied after modules are loaded during startup or arguments to loadex. */
list *loadmodule_queue; /* List of modules to load at startup. */ list *loadmodule_queue; /* List of modules to load at startup. */
int module_pipe[2]; /* Pipe used to awake the event loop by module threads. */ int module_pipe[2]; /* Pipe used to awake the event loop by module threads. */
pid_t child_pid; /* PID of current child */ pid_t child_pid; /* PID of current child */
@ -1639,6 +1739,7 @@ struct redisServer {
redisAtomic uint64_t next_client_id; /* Next client unique ID. Incremental. */ redisAtomic uint64_t next_client_id; /* Next client unique ID. Incremental. */
int protected_mode; /* Don't accept external connections. */ int protected_mode; /* Don't accept external connections. */
int io_threads_num; /* Number of IO threads to use. */ int io_threads_num; /* Number of IO threads to use. */
int io_threads_clients_num[IO_THREADS_MAX_NUM]; /* Number of clients assigned to each IO thread. */
int io_threads_do_reads; /* Read and parse from IO threads? */ int io_threads_do_reads; /* Read and parse from IO threads? */
int io_threads_active; /* Is IO threads currently active? */ int io_threads_active; /* Is IO threads currently active? */
long long events_processed_while_blocked; /* processEventsWhileBlocked() */ long long events_processed_while_blocked; /* processEventsWhileBlocked() */
@ -1711,10 +1812,8 @@ struct redisServer {
long long stat_unexpected_error_replies; /* Number of unexpected (aof-loading, replica to master, etc.) error replies */ long long stat_unexpected_error_replies; /* Number of unexpected (aof-loading, replica to master, etc.) error replies */
long long stat_total_error_replies; /* Total number of issued error replies ( command + rejected errors ) */ long long stat_total_error_replies; /* Total number of issued error replies ( command + rejected errors ) */
long long stat_dump_payload_sanitizations; /* Number deep dump payloads integrity validations. */ long long stat_dump_payload_sanitizations; /* Number deep dump payloads integrity validations. */
long long stat_io_reads_processed; /* Number of read events processed by IO / Main threads */ redisAtomic long long stat_io_reads_processed[IO_THREADS_MAX_NUM]; /* Number of read events processed by IO / Main threads */
long long stat_io_writes_processed; /* Number of write events processed by IO / Main threads */ redisAtomic long long stat_io_writes_processed[IO_THREADS_MAX_NUM]; /* Number of write events processed by IO / Main threads */
redisAtomic long long stat_total_reads_processed; /* Total number of read events processed */
redisAtomic long long stat_total_writes_processed; /* Total number of write events processed */
redisAtomic long long stat_client_qbuf_limit_disconnections; /* Total number of clients reached query buf length limit */ redisAtomic long long stat_client_qbuf_limit_disconnections; /* Total number of clients reached query buf length limit */
long long stat_client_outbuf_limit_disconnections; /* Total number of clients reached output buf length limit */ long long stat_client_outbuf_limit_disconnections; /* Total number of clients reached output buf length limit */
/* The following two are used to track instantaneous metrics, like /* The following two are used to track instantaneous metrics, like
@ -1881,6 +1980,8 @@ struct redisServer {
int repl_ping_slave_period; /* Master pings the slave every N seconds */ int repl_ping_slave_period; /* Master pings the slave every N seconds */
replBacklog *repl_backlog; /* Replication backlog for partial syncs */ replBacklog *repl_backlog; /* Replication backlog for partial syncs */
long long repl_backlog_size; /* Backlog circular buffer size */ long long repl_backlog_size; /* Backlog circular buffer size */
long long repl_full_sync_buffer_limit; /* Accumulated repl data limit during rdb channel replication */
replDataBuf repl_full_sync_buffer; /* Accumulated replication data for rdb channel replication */
time_t repl_backlog_time_limit; /* Time without slaves after the backlog time_t repl_backlog_time_limit; /* Time without slaves after the backlog
gets released. */ gets released. */
time_t repl_no_slaves_since; /* We have no slaves since that time. time_t repl_no_slaves_since; /* We have no slaves since that time.
@ -1894,6 +1995,9 @@ struct redisServer {
int repl_diskless_sync_delay; /* Delay to start a diskless repl BGSAVE. */ int repl_diskless_sync_delay; /* Delay to start a diskless repl BGSAVE. */
int repl_diskless_sync_max_replicas;/* Max replicas for diskless repl BGSAVE int repl_diskless_sync_max_replicas;/* Max replicas for diskless repl BGSAVE
* delay (start sooner if they all connect). */ * delay (start sooner if they all connect). */
int repl_rdb_channel; /* Config used to determine if the replica should
* use rdb channel replication for full syncs. */
int repl_debug_pause; /* Debug config to force the main process to pause. */
size_t repl_buffer_mem; /* The memory of replication buffer. */ size_t repl_buffer_mem; /* The memory of replication buffer. */
list *repl_buffer_blocks; /* Replication buffers blocks list list *repl_buffer_blocks; /* Replication buffers blocks list
* (serving replica clients and repl backlog) */ * (serving replica clients and repl backlog) */
@ -1907,10 +2011,13 @@ struct redisServer {
client *cached_master; /* Cached master to be reused for PSYNC. */ client *cached_master; /* Cached master to be reused for PSYNC. */
int repl_syncio_timeout; /* Timeout for synchronous I/O calls */ int repl_syncio_timeout; /* Timeout for synchronous I/O calls */
int repl_state; /* Replication status if the instance is a slave */ int repl_state; /* Replication status if the instance is a slave */
int repl_rdb_ch_state; /* State of the replica's rdb channel during rdb channel replication */
uint64_t repl_main_ch_client_id; /* Main channel client id received in +RDBCHANNELSYNC reply. */
off_t repl_transfer_size; /* Size of RDB to read from master during sync. */ off_t repl_transfer_size; /* Size of RDB to read from master during sync. */
off_t repl_transfer_read; /* Amount of RDB read from master during sync. */ off_t repl_transfer_read; /* Amount of RDB read from master during sync. */
off_t repl_transfer_last_fsync_off; /* Offset when we fsync-ed last time. */ off_t repl_transfer_last_fsync_off; /* Offset when we fsync-ed last time. */
connection *repl_transfer_s; /* Slave -> Master SYNC connection */ connection *repl_transfer_s; /* Slave -> Master SYNC connection */
connection *repl_rdb_transfer_s; /* Slave -> Master FULL SYNC connection (RDB download) */
int repl_transfer_fd; /* Slave -> Master SYNC temp file descriptor */ int repl_transfer_fd; /* Slave -> Master SYNC temp file descriptor */
char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */ char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */
time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */ time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */
@ -2462,11 +2569,6 @@ typedef struct {
#define OBJ_HASH_KEY 1 #define OBJ_HASH_KEY 1
#define OBJ_HASH_VALUE 2 #define OBJ_HASH_VALUE 2
#define IO_THREADS_OP_IDLE 0
#define IO_THREADS_OP_READ 1
#define IO_THREADS_OP_WRITE 2
extern int io_threads_op;
/* Hash-field data type (of t_hash.c) */ /* Hash-field data type (of t_hash.c) */
typedef mstr hfield; typedef mstr hfield;
extern mstrKind mstrFieldKind; extern mstrKind mstrFieldKind;
@ -2513,7 +2615,7 @@ void moduleInitModulesSystem(void);
void moduleInitModulesSystemLast(void); void moduleInitModulesSystemLast(void);
void modulesCron(void); void modulesCron(void);
int moduleLoad(const char *path, void **argv, int argc, int is_loadex); int moduleLoad(const char *path, void **argv, int argc, int is_loadex);
int moduleUnload(sds name, const char **errmsg); int moduleUnload(sds name, const char **errmsg, int forced_unload);
void moduleLoadFromQueue(void); void moduleLoadFromQueue(void);
int moduleGetCommandKeysViaAPI(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result); int moduleGetCommandKeysViaAPI(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
int moduleGetCommandChannelsViaAPI(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result); int moduleGetCommandChannelsViaAPI(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
@ -2681,9 +2783,6 @@ void whileBlockedCron(void);
void blockingOperationStarts(void); void blockingOperationStarts(void);
void blockingOperationEnds(void); void blockingOperationEnds(void);
int handleClientsWithPendingWrites(void); int handleClientsWithPendingWrites(void);
int handleClientsWithPendingWritesUsingThreads(void);
int handleClientsWithPendingReadsUsingThreads(void);
int stopThreadedIOIfNeeded(void);
int clientHasPendingReplies(client *c); int clientHasPendingReplies(client *c);
int updateClientMemUsageAndBucket(client *c); int updateClientMemUsageAndBucket(client *c);
void removeClientFromMemUsageBucket(client *c, int allow_eviction); void removeClientFromMemUsageBucket(client *c, int allow_eviction);
@ -2692,13 +2791,32 @@ int writeToClient(client *c, int handler_installed);
void linkClient(client *c); void linkClient(client *c);
void protectClient(client *c); void protectClient(client *c);
void unprotectClient(client *c); void unprotectClient(client *c);
void initThreadedIO(void);
client *lookupClientByID(uint64_t id); client *lookupClientByID(uint64_t id);
int authRequired(client *c); int authRequired(client *c);
void putClientInPendingWriteQueue(client *c); void putClientInPendingWriteQueue(client *c);
/* reply macros */ /* reply macros */
#define ADD_REPLY_BULK_CBUFFER_STRING_CONSTANT(c, str) addReplyBulkCBuffer(c, str, strlen(str)) #define ADD_REPLY_BULK_CBUFFER_STRING_CONSTANT(c, str) addReplyBulkCBuffer(c, str, strlen(str))
/* iothread.c - the threaded io implementation */
void initThreadedIO(void);
void killIOThreads(void);
void pauseIOThread(int id);
void resumeIOThread(int id);
void pauseAllIOThreads(void);
void resumeAllIOThreads(void);
void pauseIOThreadsRange(int start, int end);
void resumeIOThreadsRange(int start, int end);
int resizeAllIOThreadsEventLoops(size_t newsize);
int sendPendingClientsToIOThreads(void);
void enqueuePendingClientsToMainThread(client *c, int unbind);
void putInPendingClienstForIOThreads(client *c);
void handleClientReadError(client *c);
void unbindClientFromIOThreadEventLoop(client *c);
void processClientsOfAllIOThreads(void);
void assignClientToIOThread(client *c);
void fetchClientFromIOThread(client *c);
int isClientMustHandledByMainThread(client *c);
/* logreqres.c - logging of requests and responses */ /* logreqres.c - logging of requests and responses */
void reqresReset(client *c, int free_buf); void reqresReset(client *c, int free_buf);
void reqresSaveClientReplyOffset(client *c); void reqresSaveClientReplyOffset(client *c);
@ -2799,6 +2917,7 @@ int isSdsRepresentableAsLongLong(sds s, long long *llval);
int isObjectRepresentableAsLongLong(robj *o, long long *llongval); int isObjectRepresentableAsLongLong(robj *o, long long *llongval);
robj *tryObjectEncoding(robj *o); robj *tryObjectEncoding(robj *o);
robj *tryObjectEncodingEx(robj *o, int try_trim); robj *tryObjectEncodingEx(robj *o, int try_trim);
size_t getObjectLength(robj *o);
robj *getDecodedObject(robj *o); robj *getDecodedObject(robj *o);
size_t stringObjectLen(robj *o); size_t stringObjectLen(robj *o);
robj *createStringObjectFromLongLong(long long value); robj *createStringObjectFromLongLong(long long value);
@ -2881,6 +3000,8 @@ void clearFailoverState(void);
void updateFailoverStatus(void); void updateFailoverStatus(void);
void abortFailover(const char *err); void abortFailover(const char *err);
const char *getFailoverStateString(void); const char *getFailoverStateString(void);
int replicationCheckHasMainChannel(client *slave);
unsigned long replicationLogicalReplicaCount(void);
/* Generic persistence functions */ /* Generic persistence functions */
void startLoadingFile(size_t size, char* filename, int rdbflags); void startLoadingFile(size_t size, char* filename, int rdbflags);
@ -3242,7 +3363,8 @@ void hashTypeCurrentObject(hashTypeIterator *hi, int what, unsigned char **vstr,
unsigned int *vlen, long long *vll, uint64_t *expireTime); unsigned int *vlen, long long *vll, uint64_t *expireTime);
sds hashTypeCurrentObjectNewSds(hashTypeIterator *hi, int what); sds hashTypeCurrentObjectNewSds(hashTypeIterator *hi, int what);
hfield hashTypeCurrentObjectNewHfield(hashTypeIterator *hi); hfield hashTypeCurrentObjectNewHfield(hashTypeIterator *hi);
robj *hashTypeGetValueObject(redisDb *db, robj *o, sds field, int hfeFlags, int *isHashDeleted); int hashTypeGetValueObject(redisDb *db, robj *o, sds field, int hfeFlags,
robj **val, uint64_t *expireTime, int *isHashDeleted);
int hashTypeSet(redisDb *db, robj *o, sds field, sds value, int flags); int hashTypeSet(redisDb *db, robj *o, sds field, sds value, int flags);
robj *hashTypeDup(robj *o, sds newkey, uint64_t *minHashExpire); robj *hashTypeDup(robj *o, sds newkey, uint64_t *minHashExpire);
uint64_t hashTypeRemoveFromExpires(ebuckets *hexpires, robj *o); uint64_t hashTypeRemoveFromExpires(ebuckets *hexpires, robj *o);
@ -3347,10 +3469,10 @@ void freeServerClientMemUsageBuckets(void);
typedef struct ModuleConfig ModuleConfig; typedef struct ModuleConfig ModuleConfig;
int performModuleConfigSetFromName(sds name, sds value, const char **err); int performModuleConfigSetFromName(sds name, sds value, const char **err);
int performModuleConfigSetDefaultFromName(sds name, const char **err); int performModuleConfigSetDefaultFromName(sds name, const char **err);
void addModuleBoolConfig(const char *module_name, const char *name, int flags, void *privdata, int default_val); void addModuleBoolConfig(sds name, sds alias, int flags, void *privdata, int default_val);
void addModuleStringConfig(const char *module_name, const char *name, int flags, void *privdata, sds default_val); void addModuleStringConfig(sds name, sds alias, int flags, void *privdata, sds default_val);
void addModuleEnumConfig(const char *module_name, const char *name, int flags, void *privdata, int default_val, configEnum *enum_vals); void addModuleEnumConfig(sds name, sds alias, int flags, void *privdata, int default_val, configEnum *enum_vals, int num_enum_vals);
void addModuleNumericConfig(const char *module_name, const char *name, int flags, void *privdata, long long default_val, int conf_flags, long long lower, long long upper); void addModuleNumericConfig(sds name, sds alias, int flags, void *privdata, long long default_val, int conf_flags, long long lower, long long upper);
void addModuleConfigApply(list *module_configs, ModuleConfig *module_config); void addModuleConfigApply(list *module_configs, ModuleConfig *module_config);
int moduleConfigApplyConfig(list *module_configs, const char **err, const char **err_arg_name); int moduleConfigApplyConfig(list *module_configs, const char **err, const char **err_arg_name);
int getModuleBoolConfig(ModuleConfig *module_config); int getModuleBoolConfig(ModuleConfig *module_config);
@ -3363,6 +3485,7 @@ long long getModuleNumericConfig(ModuleConfig *module_config);
int setModuleNumericConfig(ModuleConfig *config, long long val, const char **err); int setModuleNumericConfig(ModuleConfig *config, long long val, const char **err);
/* db.c -- Keyspace access API */ /* db.c -- Keyspace access API */
void updateKeysizesHist(redisDb *db, int didx, uint32_t type, uint64_t oldLen, uint64_t newLen);
int removeExpire(redisDb *db, robj *key); int removeExpire(redisDb *db, robj *key);
void deleteExpiredKeyAndPropagate(redisDb *db, robj *keyobj); void deleteExpiredKeyAndPropagate(redisDb *db, robj *keyobj);
void deleteEvictedKeyAndPropagate(redisDb *db, robj *keyobj, long long *key_mem_freed); void deleteEvictedKeyAndPropagate(redisDb *db, robj *keyobj, long long *key_mem_freed);
@ -3511,9 +3634,9 @@ int ldbIsEnabled(void);
void ldbLog(sds entry); void ldbLog(sds entry);
void ldbLogRedisReply(char *reply); void ldbLogRedisReply(char *reply);
void sha1hex(char *digest, char *script, size_t len); void sha1hex(char *digest, char *script, size_t len);
unsigned long evalMemory(void); unsigned long evalScriptsMemoryVM(void);
dict* evalScriptsDict(void); dict* evalScriptsDict(void);
unsigned long evalScriptsMemory(void); unsigned long evalScriptsMemoryEngine(void);
uint64_t evalGetCommandFlags(client *c, uint64_t orig_flags); uint64_t evalGetCommandFlags(client *c, uint64_t orig_flags);
uint64_t fcallGetCommandFlags(client *c, uint64_t orig_flags); uint64_t fcallGetCommandFlags(client *c, uint64_t orig_flags);
int isInsideYieldingLongCommand(void); int isInsideYieldingLongCommand(void);
@ -3775,6 +3898,7 @@ void configGetCommand(client *c);
void configResetStatCommand(client *c); void configResetStatCommand(client *c);
void configRewriteCommand(client *c); void configRewriteCommand(client *c);
void configHelpCommand(client *c); void configHelpCommand(client *c);
int configExists(const sds name);
void hincrbyCommand(client *c); void hincrbyCommand(client *c);
void hincrbyfloatCommand(client *c); void hincrbyfloatCommand(client *c);
void subscribeCommand(client *c); void subscribeCommand(client *c);
@ -3898,11 +4022,11 @@ void xorDigest(unsigned char *digest, const void *ptr, size_t len);
sds catSubCommandFullname(const char *parent_name, const char *sub_name); sds catSubCommandFullname(const char *parent_name, const char *sub_name);
void commandAddSubcommand(struct redisCommand *parent, struct redisCommand *subcommand, const char *declared_name); void commandAddSubcommand(struct redisCommand *parent, struct redisCommand *subcommand, const char *declared_name);
void debugDelay(int usec); void debugDelay(int usec);
void killIOThreads(void);
void killThreads(void); void killThreads(void);
void makeThreadKillable(void); void makeThreadKillable(void);
void swapMainDbWithTempDb(redisDb *tempDb); void swapMainDbWithTempDb(redisDb *tempDb);
sds getVersion(void); sds getVersion(void);
void debugPauseProcess(void);
/* Use macro for checking log level to avoid evaluating arguments in cases log /* Use macro for checking log level to avoid evaluating arguments in cases log
* should be ignored due to low level. */ * should be ignored due to low level. */

View File

@ -53,11 +53,12 @@ static ConnectionType CT_Socket;
* be embedded in different structs, not just client. * be embedded in different structs, not just client.
*/ */
static connection *connCreateSocket(void) { static connection *connCreateSocket(struct aeEventLoop *el) {
connection *conn = zcalloc(sizeof(connection)); connection *conn = zcalloc(sizeof(connection));
conn->type = &CT_Socket; conn->type = &CT_Socket;
conn->fd = -1; conn->fd = -1;
conn->iovcnt = IOV_MAX; conn->iovcnt = IOV_MAX;
conn->el = el;
return conn; return conn;
} }
@ -72,9 +73,9 @@ static connection *connCreateSocket(void) {
* is not in an error state (which is not possible for a socket connection, * is not in an error state (which is not possible for a socket connection,
* but could but possible with other protocols). * but could but possible with other protocols).
*/ */
static connection *connCreateAcceptedSocket(int fd, void *priv) { static connection *connCreateAcceptedSocket(struct aeEventLoop *el, int fd, void *priv) {
UNUSED(priv); UNUSED(priv);
connection *conn = connCreateSocket(); connection *conn = connCreateSocket(el);
conn->fd = fd; conn->fd = fd;
conn->state = CONN_STATE_ACCEPTING; conn->state = CONN_STATE_ACCEPTING;
return conn; return conn;
@ -93,7 +94,7 @@ static int connSocketConnect(connection *conn, const char *addr, int port, const
conn->state = CONN_STATE_CONNECTING; conn->state = CONN_STATE_CONNECTING;
conn->conn_handler = connect_handler; conn->conn_handler = connect_handler;
aeCreateFileEvent(server.el, conn->fd, AE_WRITABLE, aeCreateFileEvent(conn->el, conn->fd, AE_WRITABLE,
conn->type->ae_handler, conn); conn->type->ae_handler, conn);
return C_OK; return C_OK;
@ -114,7 +115,7 @@ static void connSocketShutdown(connection *conn) {
/* Close the connection and free resources. */ /* Close the connection and free resources. */
static void connSocketClose(connection *conn) { static void connSocketClose(connection *conn) {
if (conn->fd != -1) { if (conn->fd != -1) {
aeDeleteFileEvent(server.el,conn->fd, AE_READABLE | AE_WRITABLE); if (conn->el) aeDeleteFileEvent(conn->el, conn->fd, AE_READABLE | AE_WRITABLE);
close(conn->fd); close(conn->fd);
conn->fd = -1; conn->fd = -1;
} }
@ -190,6 +191,15 @@ static int connSocketAccept(connection *conn, ConnectionCallbackFunc accept_hand
return ret; return ret;
} }
/* Rebind the connection to another event loop, read/write handlers must not
* be installed in the current event loop, otherwise it will cause two event
* loops to manage the same connection at the same time. */
static int connSocketRebindEventLoop(connection *conn, aeEventLoop *el) {
serverAssert(!conn->el && !conn->read_handler && !conn->write_handler);
conn->el = el;
return C_OK;
}
/* Register a write handler, to be called when the connection is writable. /* Register a write handler, to be called when the connection is writable.
* If NULL, the existing handler is removed. * If NULL, the existing handler is removed.
* *
@ -207,9 +217,9 @@ static int connSocketSetWriteHandler(connection *conn, ConnectionCallbackFunc fu
else else
conn->flags &= ~CONN_FLAG_WRITE_BARRIER; conn->flags &= ~CONN_FLAG_WRITE_BARRIER;
if (!conn->write_handler) if (!conn->write_handler)
aeDeleteFileEvent(server.el,conn->fd,AE_WRITABLE); aeDeleteFileEvent(conn->el,conn->fd,AE_WRITABLE);
else else
if (aeCreateFileEvent(server.el,conn->fd,AE_WRITABLE, if (aeCreateFileEvent(conn->el,conn->fd,AE_WRITABLE,
conn->type->ae_handler,conn) == AE_ERR) return C_ERR; conn->type->ae_handler,conn) == AE_ERR) return C_ERR;
return C_OK; return C_OK;
} }
@ -222,9 +232,9 @@ static int connSocketSetReadHandler(connection *conn, ConnectionCallbackFunc fun
conn->read_handler = func; conn->read_handler = func;
if (!conn->read_handler) if (!conn->read_handler)
aeDeleteFileEvent(server.el,conn->fd,AE_READABLE); aeDeleteFileEvent(conn->el,conn->fd,AE_READABLE);
else else
if (aeCreateFileEvent(server.el,conn->fd, if (aeCreateFileEvent(conn->el,conn->fd,
AE_READABLE,conn->type->ae_handler,conn) == AE_ERR) return C_ERR; AE_READABLE,conn->type->ae_handler,conn) == AE_ERR) return C_ERR;
return C_OK; return C_OK;
} }
@ -250,7 +260,7 @@ static void connSocketEventHandler(struct aeEventLoop *el, int fd, void *clientD
conn->state = CONN_STATE_CONNECTED; conn->state = CONN_STATE_CONNECTED;
} }
if (!conn->write_handler) aeDeleteFileEvent(server.el,conn->fd,AE_WRITABLE); if (!conn->write_handler) aeDeleteFileEvent(conn->el, conn->fd, AE_WRITABLE);
if (!callHandler(conn, conn->conn_handler)) return; if (!callHandler(conn, conn->conn_handler)) return;
conn->conn_handler = NULL; conn->conn_handler = NULL;
@ -291,7 +301,6 @@ static void connSocketAcceptHandler(aeEventLoop *el, int fd, void *privdata, int
int cport, cfd; int cport, cfd;
int max = server.max_new_conns_per_cycle; int max = server.max_new_conns_per_cycle;
char cip[NET_IP_STR_LEN]; char cip[NET_IP_STR_LEN];
UNUSED(el);
UNUSED(mask); UNUSED(mask);
UNUSED(privdata); UNUSED(privdata);
@ -304,7 +313,7 @@ static void connSocketAcceptHandler(aeEventLoop *el, int fd, void *privdata, int
return; return;
} }
serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport); serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
acceptCommonHandler(connCreateAcceptedSocket(cfd, NULL),0,cip); acceptCommonHandler(connCreateAcceptedSocket(el,cfd,NULL), 0, cip);
} }
} }
@ -397,6 +406,10 @@ static ConnectionType CT_Socket = {
.blocking_connect = connSocketBlockingConnect, .blocking_connect = connSocketBlockingConnect,
.accept = connSocketAccept, .accept = connSocketAccept,
/* event loop */
.unbind_event_loop = NULL,
.rebind_event_loop = connSocketRebindEventLoop,
/* IO */ /* IO */
.write = connSocketWrite, .write = connSocketWrite,
.writev = connSocketWritev, .writev = connSocketWritev,

View File

@ -41,7 +41,7 @@ redisSortOperation *createSortOperation(int type, robj *pattern) {
robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) { robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
char *p, *f, *k; char *p, *f, *k;
sds spat, ssub; sds spat, ssub;
robj *keyobj, *fieldobj = NULL, *o; robj *keyobj, *fieldobj = NULL, *o, *val;
int prefixlen, sublen, postfixlen, fieldlen; int prefixlen, sublen, postfixlen, fieldlen;
/* If the pattern is "#" return the substitution object itself in order /* If the pattern is "#" return the substitution object itself in order
@ -95,7 +95,8 @@ robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
/* Retrieve value from hash by the field name. The returned object /* Retrieve value from hash by the field name. The returned object
* is a new object with refcount already incremented. */ * is a new object with refcount already incremented. */
int isHashDeleted; int isHashDeleted;
o = hashTypeGetValueObject(db, o, fieldobj->ptr, HFE_LAZY_EXPIRE, &isHashDeleted); hashTypeGetValueObject(db, o, fieldobj->ptr, HFE_LAZY_EXPIRE, &val, NULL, &isHashDeleted);
o = val;
if (isHashDeleted) if (isHashDeleted)
goto noobj; goto noobj;

View File

@ -422,8 +422,13 @@ void listpackExExpire(redisDb *db, robj *o, ExpireInfo *info) {
expired++; expired++;
} }
if (expired) if (expired) {
lpt->lp = lpDeleteRange(lpt->lp, 0, expired * 3); lpt->lp = lpDeleteRange(lpt->lp, 0, expired * 3);
/* update keysizes */
unsigned long l = lpLength(lpt->lp) / 3;
updateKeysizesHist(db, getKeySlot(lpt->key), OBJ_HASH, l + expired, l);
}
min = hashTypeGetMinExpire(o, 1 /*accurate*/); min = hashTypeGetMinExpire(o, 1 /*accurate*/);
info->nextExpireTime = min; info->nextExpireTime = min;
@ -546,6 +551,11 @@ SetExRes hashTypeSetExpiryListpack(HashTypeSetEx *ex, sds field,
if (unlikely(checkAlreadyExpired(expireAt))) { if (unlikely(checkAlreadyExpired(expireAt))) {
propagateHashFieldDeletion(ex->db, ex->key->ptr, field, sdslen(field)); propagateHashFieldDeletion(ex->db, ex->key->ptr, field, sdslen(field));
hashTypeDelete(ex->hashObj, field, 1); hashTypeDelete(ex->hashObj, field, 1);
/* get listpack length */
listpackEx *lpt = ((listpackEx *) ex->hashObj->ptr);
unsigned long length = lpLength(lpt->lp) / 3;
updateKeysizesHist(ex->db, getKeySlot(ex->key->ptr), OBJ_HASH, length+1, length);
server.stat_expired_subkeys++; server.stat_expired_subkeys++;
ex->fieldDeleted++; ex->fieldDeleted++;
return HSETEX_DELETED; return HSETEX_DELETED;
@ -706,24 +716,28 @@ GetFieldRes hashTypeGetFromHashTable(robj *o, sds field, sds *value, uint64_t *e
* If *vll is populated *vstr is set to NULL, so the caller can * If *vll is populated *vstr is set to NULL, so the caller can
* always check the function return by checking the return value * always check the function return by checking the return value
* for GETF_OK and checking if vll (or vstr) is NULL. * for GETF_OK and checking if vll (or vstr) is NULL.
* * expiredAt - if the field has an expiration time, it will be set to the expiration
* time of the field. Otherwise, will be set to EB_EXPIRE_TIME_INVALID.
*/ */
GetFieldRes hashTypeGetValue(redisDb *db, robj *o, sds field, unsigned char **vstr, GetFieldRes hashTypeGetValue(redisDb *db, robj *o, sds field, unsigned char **vstr,
unsigned int *vlen, long long *vll, int hfeFlags) { unsigned int *vlen, long long *vll,
uint64_t expiredAt; int hfeFlags, uint64_t *expiredAt)
{
sds key; sds key;
GetFieldRes res; GetFieldRes res;
uint64_t dummy;
if (expiredAt == NULL) expiredAt = &dummy;
if (o->encoding == OBJ_ENCODING_LISTPACK || if (o->encoding == OBJ_ENCODING_LISTPACK ||
o->encoding == OBJ_ENCODING_LISTPACK_EX) { o->encoding == OBJ_ENCODING_LISTPACK_EX) {
*vstr = NULL; *vstr = NULL;
res = hashTypeGetFromListpack(o, field, vstr, vlen, vll, &expiredAt); res = hashTypeGetFromListpack(o, field, vstr, vlen, vll, expiredAt);
if (res == GETF_NOT_FOUND) if (res == GETF_NOT_FOUND)
return GETF_NOT_FOUND; return GETF_NOT_FOUND;
} else if (o->encoding == OBJ_ENCODING_HT) { } else if (o->encoding == OBJ_ENCODING_HT) {
sds value = NULL; sds value = NULL;
res = hashTypeGetFromHashTable(o, field, &value, &expiredAt); res = hashTypeGetFromHashTable(o, field, &value, expiredAt);
if (res == GETF_NOT_FOUND) if (res == GETF_NOT_FOUND)
return GETF_NOT_FOUND; return GETF_NOT_FOUND;
@ -734,7 +748,8 @@ GetFieldRes hashTypeGetValue(redisDb *db, robj *o, sds field, unsigned char **vs
serverPanic("Unknown hash encoding"); serverPanic("Unknown hash encoding");
} }
if ((expiredAt >= (uint64_t) commandTimeSnapshot()) || (hfeFlags & HFE_LAZY_ACCESS_EXPIRED)) if ((*expiredAt >= (uint64_t) commandTimeSnapshot()) ||
(hfeFlags & HFE_LAZY_ACCESS_EXPIRED))
return GETF_OK; return GETF_OK;
if (server.masterhost) { if (server.masterhost) {
@ -787,29 +802,46 @@ GetFieldRes hashTypeGetValue(redisDb *db, robj *o, sds field, unsigned char **vs
* isHashDeleted - If attempted to access expired field and it's the last field * isHashDeleted - If attempted to access expired field and it's the last field
* in the hash, then the hash will as well be deleted. In this case, * in the hash, then the hash will as well be deleted. In this case,
* isHashDeleted will be set to 1. * isHashDeleted will be set to 1.
* val - If the field is found, then val will be set to the value object.
* expireTime - If the field exists (`GETF_OK`) then expireTime will be set to
* the expiration time of the field. Otherwise, it will be set to 0.
*
* Returns 1 if the field exists, and 0 when it doesn't.
*/ */
robj *hashTypeGetValueObject(redisDb *db, robj *o, sds field, int hfeFlags, int *isHashDeleted) { int hashTypeGetValueObject(redisDb *db, robj *o, sds field, int hfeFlags,
robj **val, uint64_t *expireTime, int *isHashDeleted) {
unsigned char *vstr; unsigned char *vstr;
unsigned int vlen; unsigned int vlen;
long long vll; long long vll;
if (isHashDeleted) *isHashDeleted = 0; if (isHashDeleted) *isHashDeleted = 0;
GetFieldRes res = hashTypeGetValue(db,o,field,&vstr,&vlen,&vll, hfeFlags); if (val) *val = NULL;
GetFieldRes res = hashTypeGetValue(db,o,field,&vstr,&vlen,&vll,
hfeFlags, expireTime);
if (res == GETF_OK) { if (res == GETF_OK) {
if (vstr) return createStringObject((char*)vstr,vlen); /* expireTime set to 0 if the field has no expiration time */
else return createStringObjectFromLongLong(vll); if (expireTime && (*expireTime == EB_EXPIRE_TIME_INVALID))
*expireTime = 0;
/* If expected to return the value, then create a new object */
if (val) {
if (vstr) *val = createStringObject((char *) vstr, vlen);
else *val = createStringObjectFromLongLong(vll);
}
return 1;
} }
if ((res == GETF_EXPIRED_HASH) && (isHashDeleted)) if ((res == GETF_EXPIRED_HASH) && (isHashDeleted))
*isHashDeleted = 1; *isHashDeleted = 1;
/* GETF_EXPIRED_HASH, GETF_EXPIRED, GETF_NOT_FOUND */ /* GETF_EXPIRED_HASH, GETF_EXPIRED, GETF_NOT_FOUND */
return NULL; return 0;
} }
/* Test if the specified field exists in the given hash. If the field is /* Test if the specified field exists in the given hash. If the field is
* expired (HFE), then it will be lazy deleted * expired (HFE), then it will be lazy deleted unless HFE_LAZY_AVOID_FIELD_DEL
* hfeFlags is set.
* *
* hfeFlags - Lookup HFE_LAZY_* flags * hfeFlags - Lookup HFE_LAZY_* flags
* isHashDeleted - If attempted to access expired field and it is the last field * isHashDeleted - If attempted to access expired field and it is the last field
@ -823,7 +855,8 @@ int hashTypeExists(redisDb *db, robj *o, sds field, int hfeFlags, int *isHashDel
unsigned int vlen = UINT_MAX; unsigned int vlen = UINT_MAX;
long long vll = LLONG_MAX; long long vll = LLONG_MAX;
GetFieldRes res = hashTypeGetValue(db, o, field, &vstr, &vlen, &vll, hfeFlags); GetFieldRes res = hashTypeGetValue(db, o, field, &vstr, &vlen, &vll,
hfeFlags, NULL);
if (isHashDeleted) if (isHashDeleted)
*isHashDeleted = (res == GETF_EXPIRED_HASH) ? 1 : 0; *isHashDeleted = (res == GETF_EXPIRED_HASH) ? 1 : 0;
return (res == GETF_OK) ? 1 : 0; return (res == GETF_OK) ? 1 : 0;
@ -933,27 +966,25 @@ int hashTypeSet(redisDb *db, robj *o, sds field, sds value, int flags) {
hashTypeConvert(o, OBJ_ENCODING_HT, &db->hexpires); hashTypeConvert(o, OBJ_ENCODING_HT, &db->hexpires);
} else if (o->encoding == OBJ_ENCODING_HT) { } else if (o->encoding == OBJ_ENCODING_HT) {
hfield newField = hfieldNew(field, sdslen(field), 0);
dict *ht = o->ptr; dict *ht = o->ptr;
dictEntry *de, *existing; dictEntry *de, *existing;
const uint64_t hash = dictGetHash(ht,field);
/* stored key is different than lookup key */ /* check if field already exists */
dictUseStoredKeyApi(ht, 1); existing = dictFindByHash(ht, field, hash);
de = dictAddRaw(ht, newField, &existing); /* check if field already exists */
dictUseStoredKeyApi(ht, 0); if (existing == NULL) {
hfield newField = hfieldNew(field, sdslen(field), 0);
/* If field already exists, then update "field". "Value" will be set afterward */ dictUseStoredKeyApi(ht, 1);
if (de == NULL) { de = dictAddNonExistsByHash(ht, newField, hash);
if (flags & HASH_SET_KEEP_TTL) { dictUseStoredKeyApi(ht, 0);
/* keep old field along with TTL */ } else {
hfieldFree(newField); /* If attached TTL to the old field, then remove it from hash's
} else { * private ebuckets when HASH_SET_KEEP_TTL is not set. */
/* If attached TTL to the old field, then remove it from hash's private ebuckets */ if (!(flags & HASH_SET_KEEP_TTL)) {
hfield oldField = dictGetKey(existing); hfield oldField = dictGetKey(existing);
hfieldPersist(o, oldField); hfieldPersist(o, oldField);
hfieldFree(oldField);
dictSetKey(ht, existing, newField);
} }
/* Free the old value */
sdsfree(dictGetVal(existing)); sdsfree(dictGetVal(existing));
update = 1; update = 1;
de = existing; de = existing;
@ -1042,6 +1073,8 @@ SetExRes hashTypeSetExpiryHT(HashTypeSetEx *exInfo, sds field, uint64_t expireAt
/* If expired, then delete the field and propagate the deletion. /* If expired, then delete the field and propagate the deletion.
* If replica, continue like the field is valid */ * If replica, continue like the field is valid */
if (unlikely(checkAlreadyExpired(expireAt))) { if (unlikely(checkAlreadyExpired(expireAt))) {
unsigned long length = dictSize(ht);
updateKeysizesHist(exInfo->db, getKeySlot(exInfo->key->ptr), OBJ_HASH, length, length-1);
/* replicas should not initiate deletion of fields */ /* replicas should not initiate deletion of fields */
propagateHashFieldDeletion(exInfo->db, exInfo->key->ptr, field, sdslen(field)); propagateHashFieldDeletion(exInfo->db, exInfo->key->ptr, field, sdslen(field));
hashTypeDelete(exInfo->hashObj, field, 1); hashTypeDelete(exInfo->hashObj, field, 1);
@ -2132,6 +2165,7 @@ ebuckets *hashTypeGetDictMetaHFE(dict *d) {
*----------------------------------------------------------------------------*/ *----------------------------------------------------------------------------*/
void hsetnxCommand(client *c) { void hsetnxCommand(client *c) {
unsigned long hlen;
int isHashDeleted; int isHashDeleted;
robj *o; robj *o;
if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return; if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
@ -2152,6 +2186,8 @@ void hsetnxCommand(client *c) {
addReply(c, shared.cone); addReply(c, shared.cone);
signalModifiedKey(c,c->db,c->argv[1]); signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id); notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id);
hlen = hashTypeLength(o, 0);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_HASH, hlen - 1, hlen);
server.dirty++; server.dirty++;
} }
@ -2180,6 +2216,8 @@ void hsetCommand(client *c) {
addReply(c, shared.ok); addReply(c, shared.ok);
} }
signalModifiedKey(c,c->db,c->argv[1]); signalModifiedKey(c,c->db,c->argv[1]);
unsigned long l = hashTypeLength(o, 0);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_HASH, l - created, l);
notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id); notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id);
server.dirty += (c->argc - 2)/2; server.dirty += (c->argc - 2)/2;
} }
@ -2195,7 +2233,7 @@ void hincrbyCommand(client *c) {
if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return; if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
GetFieldRes res = hashTypeGetValue(c->db,o,c->argv[2]->ptr,&vstr,&vlen,&value, GetFieldRes res = hashTypeGetValue(c->db,o,c->argv[2]->ptr,&vstr,&vlen,&value,
HFE_LAZY_EXPIRE); HFE_LAZY_EXPIRE, NULL);
if (res == GETF_OK) { if (res == GETF_OK) {
if (vstr) { if (vstr) {
if (string2ll((char*)vstr,vlen,&value) == 0) { if (string2ll((char*)vstr,vlen,&value) == 0) {
@ -2205,11 +2243,14 @@ void hincrbyCommand(client *c) {
} /* Else hashTypeGetValue() already stored it into &value */ } /* Else hashTypeGetValue() already stored it into &value */
} else if ((res == GETF_NOT_FOUND) || (res == GETF_EXPIRED)) { } else if ((res == GETF_NOT_FOUND) || (res == GETF_EXPIRED)) {
value = 0; value = 0;
unsigned long l = hashTypeLength(o, 0);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_HASH, l, l + 1);
} else { } else {
/* Field expired and in turn hash deleted. Create new one! */ /* Field expired and in turn hash deleted. Create new one! */
o = createHashObject(); o = createHashObject();
dbAdd(c->db,c->argv[1],o); dbAdd(c->db,c->argv[1],o);
value = 0; value = 0;
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_HASH, 0, 1);
} }
oldvalue = value; oldvalue = value;
@ -2242,7 +2283,7 @@ void hincrbyfloatCommand(client *c) {
} }
if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return; if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
GetFieldRes res = hashTypeGetValue(c->db, o,c->argv[2]->ptr,&vstr,&vlen,&ll, GetFieldRes res = hashTypeGetValue(c->db, o,c->argv[2]->ptr,&vstr,&vlen,&ll,
HFE_LAZY_EXPIRE); HFE_LAZY_EXPIRE, NULL);
if (res == GETF_OK) { if (res == GETF_OK) {
if (vstr) { if (vstr) {
if (string2ld((char*)vstr,vlen,&value) == 0) { if (string2ld((char*)vstr,vlen,&value) == 0) {
@ -2254,11 +2295,14 @@ void hincrbyfloatCommand(client *c) {
} }
} else if ((res == GETF_NOT_FOUND) || (res == GETF_EXPIRED)) { } else if ((res == GETF_NOT_FOUND) || (res == GETF_EXPIRED)) {
value = 0; value = 0;
unsigned long l = hashTypeLength(o, 0);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_HASH, l, l + 1);
} else { } else {
/* Field expired and in turn hash deleted. Create new one! */ /* Field expired and in turn hash deleted. Create new one! */
o = createHashObject(); o = createHashObject();
dbAdd(c->db,c->argv[1],o); dbAdd(c->db,c->argv[1],o);
value = 0; value = 0;
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_HASH, 0, 1);
} }
value += incr; value += incr;
@ -2296,7 +2340,7 @@ static GetFieldRes addHashFieldToReply(client *c, robj *o, sds field, int hfeFla
unsigned int vlen = UINT_MAX; unsigned int vlen = UINT_MAX;
long long vll = LLONG_MAX; long long vll = LLONG_MAX;
GetFieldRes res = hashTypeGetValue(c->db, o, field, &vstr, &vlen, &vll, hfeFlags); GetFieldRes res = hashTypeGetValue(c->db, o, field, &vstr, &vlen, &vll, hfeFlags, NULL);
if (res == GETF_OK) { if (res == GETF_OK) {
if (vstr) { if (vstr) {
addReplyBulkCBuffer(c, vstr, vlen); addReplyBulkCBuffer(c, vstr, vlen);
@ -2356,6 +2400,8 @@ void hdelCommand(client *c) {
if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL || if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
checkType(c,o,OBJ_HASH)) return; checkType(c,o,OBJ_HASH)) return;
unsigned long oldLen = hashTypeLength(o, 0);
/* Hash field expiration is optimized to avoid frequent update global HFE DS for /* Hash field expiration is optimized to avoid frequent update global HFE DS for
* each field deletion. Eventually active-expiration will run and update or remove * each field deletion. Eventually active-expiration will run and update or remove
* the hash from global HFE DS gracefully. Nevertheless, statistic "subexpiry" * the hash from global HFE DS gracefully. Nevertheless, statistic "subexpiry"
@ -2375,6 +2421,8 @@ void hdelCommand(client *c) {
} }
} }
if (deleted) { if (deleted) {
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_HASH, oldLen, oldLen - deleted);
signalModifiedKey(c,c->db,c->argv[1]); signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_HASH,"hdel",c->argv[1],c->db->id); notifyKeyspaceEvent(NOTIFY_HASH,"hdel",c->argv[1],c->db->id);
if (keyremoved) { if (keyremoved) {
@ -2407,8 +2455,8 @@ void hstrlenCommand(client *c) {
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL || if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
checkType(c,o,OBJ_HASH)) return; checkType(c,o,OBJ_HASH)) return;
GetFieldRes res = hashTypeGetValue(c->db, o, c->argv[2]->ptr, &vstr, &vlen, &vll, GetFieldRes res = hashTypeGetValue(c->db, o, c->argv[2]->ptr, &vstr,
HFE_LAZY_EXPIRE); &vlen, &vll, HFE_LAZY_EXPIRE, NULL);
if (res == GETF_NOT_FOUND || res == GETF_EXPIRED || res == GETF_EXPIRED_HASH) { if (res == GETF_NOT_FOUND || res == GETF_EXPIRED || res == GETF_EXPIRED_HASH) {
addReply(c, shared.czero); addReply(c, shared.czero);
@ -2943,6 +2991,11 @@ static ExpireAction onFieldExpire(eItem item, void *ctx) {
dict *d = expCtx->hashObj->ptr; dict *d = expCtx->hashObj->ptr;
dictExpireMetadata *dictExpireMeta = (dictExpireMetadata *) dictMetadata(d); dictExpireMetadata *dictExpireMeta = (dictExpireMetadata *) dictMetadata(d);
propagateHashFieldDeletion(expCtx->db, dictExpireMeta->key, hf, hfieldlen(hf)); propagateHashFieldDeletion(expCtx->db, dictExpireMeta->key, hf, hfieldlen(hf));
/* update keysizes */
unsigned long l = hashTypeLength(expCtx->hashObj, 0);
updateKeysizesHist(expCtx->db, getKeySlot(dictExpireMeta->key), OBJ_HASH, l, l - 1);
serverAssert(hashTypeDelete(expCtx->hashObj, hf, 0) == 1); serverAssert(hashTypeDelete(expCtx->hashObj, hf, 0) == 1);
server.stat_expired_subkeys++; server.stat_expired_subkeys++;
return ACT_REMOVE_EXP_ITEM; return ACT_REMOVE_EXP_ITEM;

View File

@ -7,6 +7,7 @@
*/ */
#include "server.h" #include "server.h"
#include "util.h"
/*----------------------------------------------------------------------------- /*-----------------------------------------------------------------------------
* List API * List API
@ -462,6 +463,7 @@ void listTypeDelRange(robj *subject, long start, long count) {
/* Implements LPUSH/RPUSH/LPUSHX/RPUSHX. /* Implements LPUSH/RPUSH/LPUSHX/RPUSHX.
* 'xx': push if key exists. */ * 'xx': push if key exists. */
void pushGenericCommand(client *c, int where, int xx) { void pushGenericCommand(client *c, int where, int xx) {
unsigned long llen;
int j; int j;
robj *lobj = lookupKeyWrite(c->db, c->argv[1]); robj *lobj = lookupKeyWrite(c->db, c->argv[1]);
@ -482,11 +484,13 @@ void pushGenericCommand(client *c, int where, int xx) {
server.dirty++; server.dirty++;
} }
addReplyLongLong(c, listTypeLength(lobj)); llen = listTypeLength(lobj);
addReplyLongLong(c, llen);
char *event = (where == LIST_HEAD) ? "lpush" : "rpush"; char *event = (where == LIST_HEAD) ? "lpush" : "rpush";
signalModifiedKey(c,c->db,c->argv[1]); signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_LIST,event,c->argv[1],c->db->id); notifyKeyspaceEvent(NOTIFY_LIST,event,c->argv[1],c->db->id);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_LIST, llen - (c->argc - 2), llen);
} }
/* LPUSH <key> <element> [<element> ...] */ /* LPUSH <key> <element> [<element> ...] */
@ -553,6 +557,8 @@ void linsertCommand(client *c) {
notifyKeyspaceEvent(NOTIFY_LIST,"linsert", notifyKeyspaceEvent(NOTIFY_LIST,"linsert",
c->argv[1],c->db->id); c->argv[1],c->db->id);
server.dirty++; server.dirty++;
unsigned long ll = listTypeLength(subject);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_LIST, ll-1, ll);
} else { } else {
/* Notify client of a failed insert */ /* Notify client of a failed insert */
addReplyLongLong(c,-1); addReplyLongLong(c,-1);
@ -677,23 +683,20 @@ void addListQuicklistRangeReply(client *c, robj *o, int from, int rangelen, int
* Note that the purpose is to make the methods small so that the * Note that the purpose is to make the methods small so that the
* code in the loop can be inlined better to improve performance. */ * code in the loop can be inlined better to improve performance. */
void addListListpackRangeReply(client *c, robj *o, int from, int rangelen, int reverse) { void addListListpackRangeReply(client *c, robj *o, int from, int rangelen, int reverse) {
unsigned char *p = lpSeek(o->ptr, from); unsigned char *lp = o->ptr;
unsigned char *vstr; unsigned char *p = lpSeek(lp, from);
unsigned int vlen; const size_t lpbytes = lpBytes(lp);
long long lval; int64_t vlen;
/* Return the result in form of a multi-bulk reply */ /* Return the result in form of a multi-bulk reply */
addReplyArrayLen(c,rangelen); addReplyArrayLen(c,rangelen);
while(rangelen--) { while(rangelen--) {
serverAssert(p); /* fail on corrupt data */ serverAssert(p); /* fail on corrupt data */
vstr = lpGetValue(p, &vlen, &lval); unsigned char buf[LP_INTBUF_SIZE];
if (vstr) { unsigned char *vstr = lpGet(p,&vlen,buf);
addReplyBulkCBuffer(c,vstr,vlen); addReplyBulkCBuffer(c,vstr,vlen);
} else { p = reverse ? lpPrev(lp,p) : lpNextWithBytes(lp,p,lpbytes);
addReplyBulkLongLong(c,lval);
}
p = reverse ? lpPrev(o->ptr,p) : lpNext(o->ptr,p);
} }
} }
@ -736,9 +739,11 @@ void addListRangeReply(client *c, robj *o, long start, long end, int reverse) {
* if the key got deleted by this function. */ * if the key got deleted by this function. */
void listElementsRemoved(client *c, robj *key, int where, robj *o, long count, int signal, int *deleted) { void listElementsRemoved(client *c, robj *key, int where, robj *o, long count, int signal, int *deleted) {
char *event = (where == LIST_HEAD) ? "lpop" : "rpop"; char *event = (where == LIST_HEAD) ? "lpop" : "rpop";
unsigned long llen = listTypeLength(o);
notifyKeyspaceEvent(NOTIFY_LIST, event, key, c->db->id); notifyKeyspaceEvent(NOTIFY_LIST, event, key, c->db->id);
if (listTypeLength(o) == 0) { updateKeysizesHist(c->db, getKeySlot(key->ptr), OBJ_LIST, llen + count, llen);
if (llen == 0) {
if (deleted) *deleted = 1; if (deleted) *deleted = 1;
dbDelete(c->db, key); dbDelete(c->db, key);
@ -870,7 +875,7 @@ void lrangeCommand(client *c) {
/* LTRIM <key> <start> <stop> */ /* LTRIM <key> <start> <stop> */
void ltrimCommand(client *c) { void ltrimCommand(client *c) {
robj *o; robj *o;
long start, end, llen, ltrim, rtrim; long start, end, llen, ltrim, rtrim, llenNew;
if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != C_OK) || if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != C_OK) ||
(getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != C_OK)) return; (getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != C_OK)) return;
@ -908,12 +913,13 @@ void ltrimCommand(client *c) {
} }
notifyKeyspaceEvent(NOTIFY_LIST,"ltrim",c->argv[1],c->db->id); notifyKeyspaceEvent(NOTIFY_LIST,"ltrim",c->argv[1],c->db->id);
if (listTypeLength(o) == 0) { if ((llenNew = listTypeLength(o)) == 0) {
dbDelete(c->db,c->argv[1]); dbDelete(c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id); notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
} else { } else {
listTypeTryConversion(o,LIST_CONV_SHRINKING,NULL,NULL); listTypeTryConversion(o,LIST_CONV_SHRINKING,NULL,NULL);
} }
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_LIST, llen, llenNew);
signalModifiedKey(c,c->db,c->argv[1]); signalModifiedKey(c,c->db,c->argv[1]);
server.dirty += (ltrim + rtrim); server.dirty += (ltrim + rtrim);
addReply(c,shared.ok); addReply(c,shared.ok);
@ -1066,8 +1072,11 @@ void lremCommand(client *c) {
listTypeReleaseIterator(li); listTypeReleaseIterator(li);
if (removed) { if (removed) {
long ll = listTypeLength(subject);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_LIST, ll + removed, ll);
notifyKeyspaceEvent(NOTIFY_LIST,"lrem",c->argv[1],c->db->id); notifyKeyspaceEvent(NOTIFY_LIST,"lrem",c->argv[1],c->db->id);
if (listTypeLength(subject) == 0) {
if (ll == 0) {
dbDelete(c->db,c->argv[1]); dbDelete(c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id); notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
} else { } else {
@ -1089,6 +1098,10 @@ void lmoveHandlePush(client *c, robj *dstkey, robj *dstobj, robj *value,
listTypeTryConversionAppend(dstobj,&value,0,0,NULL,NULL); listTypeTryConversionAppend(dstobj,&value,0,0,NULL,NULL);
listTypePush(dstobj,value,where); listTypePush(dstobj,value,where);
signalModifiedKey(c,c->db,dstkey); signalModifiedKey(c,c->db,dstkey);
long ll = listTypeLength(dstobj);
updateKeysizesHist(c->db, getKeySlot(dstkey->ptr), OBJ_LIST, ll - 1, ll);
notifyKeyspaceEvent(NOTIFY_LIST, notifyKeyspaceEvent(NOTIFY_LIST,
where == LIST_HEAD ? "lpush" : "rpush", where == LIST_HEAD ? "lpush" : "rpush",
dstkey, dstkey,

View File

@ -603,6 +603,8 @@ void saddCommand(client *c) {
if (setTypeAdd(set,c->argv[j]->ptr)) added++; if (setTypeAdd(set,c->argv[j]->ptr)) added++;
} }
if (added) { if (added) {
unsigned long size = setTypeSize(set);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_SET, size - added, size);
signalModifiedKey(c,c->db,c->argv[1]); signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_SET,"sadd",c->argv[1],c->db->id); notifyKeyspaceEvent(NOTIFY_SET,"sadd",c->argv[1],c->db->id);
} }
@ -617,6 +619,8 @@ void sremCommand(client *c) {
if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL || if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
checkType(c,set,OBJ_SET)) return; checkType(c,set,OBJ_SET)) return;
unsigned long oldSize = setTypeSize(set);
for (j = 2; j < c->argc; j++) { for (j = 2; j < c->argc; j++) {
if (setTypeRemove(set,c->argv[j]->ptr)) { if (setTypeRemove(set,c->argv[j]->ptr)) {
deleted++; deleted++;
@ -628,6 +632,8 @@ void sremCommand(client *c) {
} }
} }
if (deleted) { if (deleted) {
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_SET, oldSize, oldSize - deleted);
signalModifiedKey(c,c->db,c->argv[1]); signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_SET,"srem",c->argv[1],c->db->id); notifyKeyspaceEvent(NOTIFY_SET,"srem",c->argv[1],c->db->id);
if (keyremoved) if (keyremoved)
@ -669,8 +675,12 @@ void smoveCommand(client *c) {
} }
notifyKeyspaceEvent(NOTIFY_SET,"srem",c->argv[1],c->db->id); notifyKeyspaceEvent(NOTIFY_SET,"srem",c->argv[1],c->db->id);
/* Update keysizes histogram */
unsigned long srcLen = setTypeSize(srcset);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_SET, srcLen + 1, srcLen);
/* Remove the src set from the database when empty */ /* Remove the src set from the database when empty */
if (setTypeSize(srcset) == 0) { if (srcLen == 0) {
dbDelete(c->db,c->argv[1]); dbDelete(c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id); notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
} }
@ -686,6 +696,8 @@ void smoveCommand(client *c) {
/* An extra key has changed when ele was successfully added to dstset */ /* An extra key has changed when ele was successfully added to dstset */
if (setTypeAdd(dstset,ele->ptr)) { if (setTypeAdd(dstset,ele->ptr)) {
unsigned long dstLen = setTypeSize(dstset);
updateKeysizesHist(c->db, getKeySlot(c->argv[2]->ptr), OBJ_SET, dstLen - 1, dstLen);
server.dirty++; server.dirty++;
signalModifiedKey(c,c->db,c->argv[2]); signalModifiedKey(c,c->db,c->argv[2]);
notifyKeyspaceEvent(NOTIFY_SET,"sadd",c->argv[2],c->db->id); notifyKeyspaceEvent(NOTIFY_SET,"sadd",c->argv[2],c->db->id);
@ -743,7 +755,7 @@ void scardCommand(client *c) {
void spopWithCountCommand(client *c) { void spopWithCountCommand(client *c) {
long l; long l;
unsigned long count, size; unsigned long count, size, toRemove;
robj *set; robj *set;
/* Get the count argument */ /* Get the count argument */
@ -763,10 +775,12 @@ void spopWithCountCommand(client *c) {
} }
size = setTypeSize(set); size = setTypeSize(set);
toRemove = (count >= size) ? size : count;
/* Generate an SPOP keyspace notification */ /* Generate an SPOP keyspace notification */
notifyKeyspaceEvent(NOTIFY_SET,"spop",c->argv[1],c->db->id); notifyKeyspaceEvent(NOTIFY_SET,"spop",c->argv[1],c->db->id);
server.dirty += (count >= size) ? size : count; server.dirty += toRemove;
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_SET, size, size - toRemove);
/* CASE 1: /* CASE 1:
* The number of requested elements is greater than or equal to * The number of requested elements is greater than or equal to
@ -949,6 +963,7 @@ void spopWithCountCommand(client *c) {
} }
void spopCommand(client *c) { void spopCommand(client *c) {
unsigned long size;
robj *set, *ele; robj *set, *ele;
if (c->argc == 3) { if (c->argc == 3) {
@ -964,6 +979,9 @@ void spopCommand(client *c) {
if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.null[c->resp])) if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.null[c->resp]))
== NULL || checkType(c,set,OBJ_SET)) return; == NULL || checkType(c,set,OBJ_SET)) return;
size = setTypeSize(set);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_SET, size, size-1);
/* Pop a random element from the set */ /* Pop a random element from the set */
ele = setTypePopRandom(set); ele = setTypePopRandom(set);

View File

@ -33,6 +33,7 @@
#define STREAM_LISTPACK_MAX_SIZE (1<<30) #define STREAM_LISTPACK_MAX_SIZE (1<<30)
void streamFreeCG(streamCG *cg); void streamFreeCG(streamCG *cg);
void streamFreeCGGeneric(void *cg);
void streamFreeNACK(streamNACK *na); void streamFreeNACK(streamNACK *na);
size_t streamReplyWithRangeFromConsumerPEL(client *c, stream *s, streamID *start, streamID *end, size_t count, streamConsumer *consumer); size_t streamReplyWithRangeFromConsumerPEL(client *c, stream *s, streamID *start, streamID *end, size_t count, streamConsumer *consumer);
int streamParseStrictIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq, int *seq_given); int streamParseStrictIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq, int *seq_given);
@ -60,9 +61,9 @@ stream *streamNew(void) {
/* Free a stream, including the listpacks stored inside the radix tree. */ /* Free a stream, including the listpacks stored inside the radix tree. */
void freeStream(stream *s) { void freeStream(stream *s) {
raxFreeWithCallback(s->rax,(void(*)(void*))lpFree); raxFreeWithCallback(s->rax, lpFreeGeneric);
if (s->cgroups) if (s->cgroups)
raxFreeWithCallback(s->cgroups,(void(*)(void*))streamFreeCG); raxFreeWithCallback(s->cgroups, streamFreeCGGeneric);
zfree(s); zfree(s);
} }
@ -241,7 +242,7 @@ robj *streamDup(robj *o) {
/* This is a wrapper function for lpGet() to directly get an integer value /* This is a wrapper function for lpGet() to directly get an integer value
* from the listpack (that may store numbers as a string), converting * from the listpack (that may store numbers as a string), converting
* the string if needed. * the string if needed.
* The 'valid" argument is an optional output parameter to get an indication * The 'valid' argument is an optional output parameter to get an indication
* if the record was valid, when this parameter is NULL, the function will * if the record was valid, when this parameter is NULL, the function will
* fail with an assertion. */ * fail with an assertion. */
static inline int64_t lpGetIntegerIfValid(unsigned char *ele, int *valid) { static inline int64_t lpGetIntegerIfValid(unsigned char *ele, int *valid) {
@ -1742,7 +1743,7 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end
/* Try to add a new NACK. Most of the time this will work and /* Try to add a new NACK. Most of the time this will work and
* will not require extra lookups. We'll fix the problem later * will not require extra lookups. We'll fix the problem later
* if we find that there is already a entry for this ID. */ * if we find that there is already an entry for this ID. */
streamNACK *nack = streamCreateNACK(consumer); streamNACK *nack = streamCreateNACK(consumer);
int group_inserted = int group_inserted =
raxTryInsert(group->pel,buf,sizeof(buf),nack,NULL); raxTryInsert(group->pel,buf,sizeof(buf),nack,NULL);
@ -1875,7 +1876,7 @@ robj *streamTypeLookupWriteOrCreate(client *c, robj *key, int no_create) {
* that can be represented. If 'strict' is set to 1, "-" and "+" will be * that can be represented. If 'strict' is set to 1, "-" and "+" will be
* treated as an invalid ID. * treated as an invalid ID.
* *
* The ID form <ms>-* specifies a millisconds-only ID, leaving the sequence part * The ID form <ms>-* specifies a milliseconds-only ID, leaving the sequence part
* to be autogenerated. When a non-NULL 'seq_given' argument is provided, this * to be autogenerated. When a non-NULL 'seq_given' argument is provided, this
* form is accepted and the argument is set to 0 unless the sequence part is * form is accepted and the argument is set to 0 unless the sequence part is
* specified. * specified.
@ -2478,6 +2479,11 @@ void streamFreeNACK(streamNACK *na) {
zfree(na); zfree(na);
} }
/* Generic version of streamFreeNACK. */
void streamFreeNACKGeneric(void *na) {
streamFreeNACK((streamNACK *)na);
}
/* Free a consumer and associated data structures. Note that this function /* Free a consumer and associated data structures. Note that this function
* will not reassign the pending messages associated with this consumer * will not reassign the pending messages associated with this consumer
* nor will delete them from the stream, so when this function is called * nor will delete them from the stream, so when this function is called
@ -2490,6 +2496,11 @@ void streamFreeConsumer(streamConsumer *sc) {
zfree(sc); zfree(sc);
} }
/* Generic version of streamFreeConsumer. */
void streamFreeConsumerGeneric(void *sc) {
streamFreeConsumer((streamConsumer *)sc);
}
/* Create a new consumer group in the context of the stream 's', having the /* Create a new consumer group in the context of the stream 's', having the
* specified name, last server ID and reads counter. If a consumer group with * specified name, last server ID and reads counter. If a consumer group with
* the same name already exists NULL is returned, otherwise the pointer to the * the same name already exists NULL is returned, otherwise the pointer to the
@ -2510,11 +2521,16 @@ streamCG *streamCreateCG(stream *s, char *name, size_t namelen, streamID *id, lo
/* Free a consumer group and all its associated data. */ /* Free a consumer group and all its associated data. */
void streamFreeCG(streamCG *cg) { void streamFreeCG(streamCG *cg) {
raxFreeWithCallback(cg->pel,(void(*)(void*))streamFreeNACK); raxFreeWithCallback(cg->pel, streamFreeNACKGeneric);
raxFreeWithCallback(cg->consumers,(void(*)(void*))streamFreeConsumer); raxFreeWithCallback(cg->consumers, streamFreeConsumerGeneric);
zfree(cg); zfree(cg);
} }
/* Generic version of streamFreeCG. */
void streamFreeCGGeneric(void *cg) {
streamFreeCG((streamCG *)cg);
}
/* Lookup the consumer group in the specified stream and returns its /* Lookup the consumer group in the specified stream and returns its
* pointer, otherwise if there is no such group, NULL is returned. */ * pointer, otherwise if there is no such group, NULL is returned. */
streamCG *streamLookupCG(stream *s, sds groupname) { streamCG *streamLookupCG(stream *s, sds groupname) {

View File

@ -21,7 +21,7 @@ static int checkStringLength(client *c, long long size, long long append) {
return C_OK; return C_OK;
/* 'uint64_t' cast is there just to prevent undefined behavior on overflow */ /* 'uint64_t' cast is there just to prevent undefined behavior on overflow */
long long total = (uint64_t)size + append; long long total = (uint64_t)size + append;
/* Test configured max-bulk-len represending a limit of the biggest string object, /* Test configured max-bulk-len representing a limit of the biggest string object,
* and also test for overflow. */ * and also test for overflow. */
if (total > server.proto_max_bulk_len || total < size || total < append) { if (total > server.proto_max_bulk_len || total < size || total < append) {
addReplyError(c,"string exceeds maximum allowed size (proto-max-bulk-len)"); addReplyError(c,"string exceeds maximum allowed size (proto-max-bulk-len)");
@ -61,7 +61,7 @@ static int checkStringLength(client *c, long long size, long long append) {
static int getExpireMillisecondsOrReply(client *c, robj *expire, int flags, int unit, long long *milliseconds); static int getExpireMillisecondsOrReply(client *c, robj *expire, int flags, int unit, long long *milliseconds);
void setGenericCommand(client *c, int flags, robj *key, robj *val, robj *expire, int unit, robj *ok_reply, robj *abort_reply) { void setGenericCommand(client *c, int flags, robj *key, robj *val, robj *expire, int unit, robj *ok_reply, robj *abort_reply) {
long long milliseconds = 0; /* initialized to avoid any harmness warning */ long long milliseconds = 0; /* initialized to avoid any harmless warning */
int found = 0; int found = 0;
int setkey_flags = 0; int setkey_flags = 0;
@ -420,6 +420,7 @@ void getsetCommand(client *c) {
} }
void setrangeCommand(client *c) { void setrangeCommand(client *c) {
size_t oldLen = 0, newLen;
robj *o; robj *o;
long offset; long offset;
sds value = c->argv[3]->ptr; sds value = c->argv[3]->ptr;
@ -449,16 +450,14 @@ void setrangeCommand(client *c) {
o = createObject(OBJ_STRING,sdsnewlen(NULL, offset+value_len)); o = createObject(OBJ_STRING,sdsnewlen(NULL, offset+value_len));
dbAdd(c->db,c->argv[1],o); dbAdd(c->db,c->argv[1],o);
} else { } else {
size_t olen;
/* Key exists, check type */ /* Key exists, check type */
if (checkType(c,o,OBJ_STRING)) if (checkType(c,o,OBJ_STRING))
return; return;
/* Return existing string length when setting nothing */ /* Return existing string length when setting nothing */
olen = stringObjectLen(o); oldLen = stringObjectLen(o);
if (value_len == 0) { if (value_len == 0) {
addReplyLongLong(c,olen); addReplyLongLong(c, oldLen);
return; return;
} }
@ -478,7 +477,10 @@ void setrangeCommand(client *c) {
"setrange",c->argv[1],c->db->id); "setrange",c->argv[1],c->db->id);
server.dirty++; server.dirty++;
} }
addReplyLongLong(c,sdslen(o->ptr));
newLen = sdslen(o->ptr);
updateKeysizesHist(c->db,getKeySlot(c->argv[1]->ptr),OBJ_STRING,oldLen,newLen);
addReplyLongLong(c,newLen);
} }
void getrangeCommand(client *c) { void getrangeCommand(client *c) {
@ -669,7 +671,7 @@ void incrbyfloatCommand(client *c) {
} }
void appendCommand(client *c) { void appendCommand(client *c) {
size_t totlen; size_t totlen, append_len;
robj *o, *append; robj *o, *append;
dictEntry *de; dictEntry *de;
@ -679,7 +681,7 @@ void appendCommand(client *c) {
c->argv[2] = tryObjectEncoding(c->argv[2]); c->argv[2] = tryObjectEncoding(c->argv[2]);
dbAdd(c->db,c->argv[1],c->argv[2]); dbAdd(c->db,c->argv[1],c->argv[2]);
incrRefCount(c->argv[2]); incrRefCount(c->argv[2]);
totlen = stringObjectLen(c->argv[2]); append_len = totlen = stringObjectLen(c->argv[2]);
} else { } else {
/* Key exists, check type */ /* Key exists, check type */
if (checkType(c,o,OBJ_STRING)) if (checkType(c,o,OBJ_STRING))
@ -687,7 +689,7 @@ void appendCommand(client *c) {
/* "append" is an argument, so always an sds */ /* "append" is an argument, so always an sds */
append = c->argv[2]; append = c->argv[2];
const size_t append_len = sdslen(append->ptr); append_len = sdslen(append->ptr);
if (checkStringLength(c,stringObjectLen(o),append_len) != C_OK) if (checkStringLength(c,stringObjectLen(o),append_len) != C_OK)
return; return;
@ -699,6 +701,7 @@ void appendCommand(client *c) {
signalModifiedKey(c,c->db,c->argv[1]); signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_STRING,"append",c->argv[1],c->db->id); notifyKeyspaceEvent(NOTIFY_STRING,"append",c->argv[1],c->db->id);
server.dirty++; server.dirty++;
updateKeysizesHist(c->db,getKeySlot(c->argv[1]->ptr),OBJ_STRING, totlen - append_len, totlen);
addReplyLongLong(c,totlen); addReplyLongLong(c,totlen);
} }

View File

@ -728,7 +728,7 @@ zskiplistNode *zslNthInLexRange(zskiplist *zsl, zlexrangespec *range, long n) {
x = x->level[0].forward; x = x->level[0].forward;
} }
} else { } else {
/* If offset is big, we caasn jump from the last zsl->level-1 node. */ /* If offset is big, we can jump from the last zsl->level-1 node. */
rank_diff = edge_rank + 1 + n - last_highest_level_rank; rank_diff = edge_rank + 1 + n - last_highest_level_rank;
x = zslGetElementByRankFromNode(last_highest_level_node, zsl->level - 1, rank_diff); x = zslGetElementByRankFromNode(last_highest_level_node, zsl->level - 1, rank_diff);
} }
@ -1843,6 +1843,7 @@ void zaddGenericCommand(client *c, int flags) {
zsetTypeMaybeConvert(zobj, elements); zsetTypeMaybeConvert(zobj, elements);
} }
unsigned long llen = zsetLength(zobj);
for (j = 0; j < elements; j++) { for (j = 0; j < elements; j++) {
double newscore; double newscore;
score = scores[j]; score = scores[j];
@ -1860,6 +1861,7 @@ void zaddGenericCommand(client *c, int flags) {
score = newscore; score = newscore;
} }
server.dirty += (added+updated); server.dirty += (added+updated);
updateKeysizesHist(c->db, getKeySlot(key->ptr), OBJ_ZSET, llen, llen+added);
reply_to_client: reply_to_client:
if (incr) { /* ZINCRBY or INCR option. */ if (incr) { /* ZINCRBY or INCR option. */
@ -1907,8 +1909,13 @@ void zremCommand(client *c) {
if (deleted) { if (deleted) {
notifyKeyspaceEvent(NOTIFY_ZSET,"zrem",key,c->db->id); notifyKeyspaceEvent(NOTIFY_ZSET,"zrem",key,c->db->id);
if (keyremoved) if (keyremoved) {
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id); notifyKeyspaceEvent(NOTIFY_GENERIC, "del", key, c->db->id);
/* No need updateKeysizesHist(). dbDelete() done it already. */
} else {
unsigned long len = zsetLength(zobj);
updateKeysizesHist(c->db, getKeySlot(key->ptr), OBJ_ZSET, len + deleted, len);
}
signalModifiedKey(c,c->db,key); signalModifiedKey(c,c->db,key);
server.dirty += deleted; server.dirty += deleted;
} }
@ -2023,8 +2030,13 @@ void zremrangeGenericCommand(client *c, zrange_type rangetype) {
if (deleted) { if (deleted) {
signalModifiedKey(c,c->db,key); signalModifiedKey(c,c->db,key);
notifyKeyspaceEvent(NOTIFY_ZSET,notify_type,key,c->db->id); notifyKeyspaceEvent(NOTIFY_ZSET,notify_type,key,c->db->id);
if (keyremoved) if (keyremoved) {
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id); notifyKeyspaceEvent(NOTIFY_GENERIC, "del", key, c->db->id);
/* No need updateKeysizesHist(). dbDelete() done it already. */
} else {
unsigned long len = zsetLength(zobj);
updateKeysizesHist(c->db, getKeySlot(key->ptr), OBJ_ZSET, len + deleted, len);
}
} }
server.dirty += deleted; server.dirty += deleted;
addReplyLongLong(c,deleted); addReplyLongLong(c,deleted);
@ -4031,6 +4043,9 @@ void genericZpopCommand(client *c, robj **keyv, int keyc, int where, int emitkey
dbDelete(c->db,key); dbDelete(c->db,key);
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id); notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
/* No need updateKeysizesHist(). dbDelete() done it already. */
} else {
updateKeysizesHist(c->db, getKeySlot(key->ptr), OBJ_ZSET, llen, llen - result_count);
} }
signalModifiedKey(c,c->db,key); signalModifiedKey(c,c->db,key);

114
src/tls.c
View File

@ -75,10 +75,6 @@ static int parseProtocolsConfig(const char *str) {
return protocols; return protocols;
} }
/* list of connections with pending data already read from the socket, but not
* served to the reader yet. */
static list *pending_list = NULL;
/** /**
* OpenSSL global initialization and locking handling callbacks. * OpenSSL global initialization and locking handling callbacks.
* Note that this is only required for OpenSSL < 1.1.0. * Note that this is only required for OpenSSL < 1.1.0.
@ -144,8 +140,6 @@ static void tlsInit(void) {
if (!RAND_poll()) { if (!RAND_poll()) {
serverLog(LL_WARNING, "OpenSSL: Failed to seed random number generator."); serverLog(LL_WARNING, "OpenSSL: Failed to seed random number generator.");
} }
pending_list = listCreate();
} }
static void tlsCleanup(void) { static void tlsCleanup(void) {
@ -435,20 +429,21 @@ typedef struct tls_connection {
listNode *pending_list_node; listNode *pending_list_node;
} tls_connection; } tls_connection;
static connection *createTLSConnection(int client_side) { static connection *createTLSConnection(struct aeEventLoop *el, int client_side) {
SSL_CTX *ctx = redis_tls_ctx; SSL_CTX *ctx = redis_tls_ctx;
if (client_side && redis_tls_client_ctx) if (client_side && redis_tls_client_ctx)
ctx = redis_tls_client_ctx; ctx = redis_tls_client_ctx;
tls_connection *conn = zcalloc(sizeof(tls_connection)); tls_connection *conn = zcalloc(sizeof(tls_connection));
conn->c.type = &CT_TLS; conn->c.type = &CT_TLS;
conn->c.fd = -1; conn->c.fd = -1;
conn->c.el = el;
conn->c.iovcnt = IOV_MAX; conn->c.iovcnt = IOV_MAX;
conn->ssl = SSL_new(ctx); conn->ssl = SSL_new(ctx);
return (connection *) conn; return (connection *) conn;
} }
static connection *connCreateTLS(void) { static connection *connCreateTLS(struct aeEventLoop *el) {
return createTLSConnection(1); return createTLSConnection(el, 1);
} }
/* Fetch the latest OpenSSL error and store it in the connection */ /* Fetch the latest OpenSSL error and store it in the connection */
@ -468,10 +463,11 @@ static void updateTLSError(tls_connection *conn) {
* Callers should use connGetState() and verify the created connection * Callers should use connGetState() and verify the created connection
* is not in an error state. * is not in an error state.
*/ */
static connection *connCreateAcceptedTLS(int fd, void *priv) { static connection *connCreateAcceptedTLS(struct aeEventLoop *el, int fd, void *priv) {
int require_auth = *(int *)priv; int require_auth = *(int *)priv;
tls_connection *conn = (tls_connection *) createTLSConnection(0); tls_connection *conn = (tls_connection *) createTLSConnection(el, 0);
conn->c.fd = fd; conn->c.fd = fd;
conn->c.el = el;
conn->c.state = CONN_STATE_ACCEPTING; conn->c.state = CONN_STATE_ACCEPTING;
if (!conn->ssl) { if (!conn->ssl) {
@ -575,17 +571,17 @@ static int updateStateAfterSSLIO(tls_connection *conn, int ret_value, int update
} }
static void registerSSLEvent(tls_connection *conn, WantIOType want) { static void registerSSLEvent(tls_connection *conn, WantIOType want) {
int mask = aeGetFileEvents(server.el, conn->c.fd); int mask = aeGetFileEvents(conn->c.el, conn->c.fd);
switch (want) { switch (want) {
case WANT_READ: case WANT_READ:
if (mask & AE_WRITABLE) aeDeleteFileEvent(server.el, conn->c.fd, AE_WRITABLE); if (mask & AE_WRITABLE) aeDeleteFileEvent(conn->c.el, conn->c.fd, AE_WRITABLE);
if (!(mask & AE_READABLE)) aeCreateFileEvent(server.el, conn->c.fd, AE_READABLE, if (!(mask & AE_READABLE)) aeCreateFileEvent(conn->c.el, conn->c.fd, AE_READABLE,
tlsEventHandler, conn); tlsEventHandler, conn);
break; break;
case WANT_WRITE: case WANT_WRITE:
if (mask & AE_READABLE) aeDeleteFileEvent(server.el, conn->c.fd, AE_READABLE); if (mask & AE_READABLE) aeDeleteFileEvent(conn->c.el, conn->c.fd, AE_READABLE);
if (!(mask & AE_WRITABLE)) aeCreateFileEvent(server.el, conn->c.fd, AE_WRITABLE, if (!(mask & AE_WRITABLE)) aeCreateFileEvent(conn->c.el, conn->c.fd, AE_WRITABLE,
tlsEventHandler, conn); tlsEventHandler, conn);
break; break;
default: default:
@ -595,19 +591,42 @@ static void registerSSLEvent(tls_connection *conn, WantIOType want) {
} }
static void updateSSLEvent(tls_connection *conn) { static void updateSSLEvent(tls_connection *conn) {
int mask = aeGetFileEvents(server.el, conn->c.fd); serverAssert(conn->c.el);
int mask = aeGetFileEvents(conn->c.el, conn->c.fd);
int need_read = conn->c.read_handler || (conn->flags & TLS_CONN_FLAG_WRITE_WANT_READ); int need_read = conn->c.read_handler || (conn->flags & TLS_CONN_FLAG_WRITE_WANT_READ);
int need_write = conn->c.write_handler || (conn->flags & TLS_CONN_FLAG_READ_WANT_WRITE); int need_write = conn->c.write_handler || (conn->flags & TLS_CONN_FLAG_READ_WANT_WRITE);
if (need_read && !(mask & AE_READABLE)) if (need_read && !(mask & AE_READABLE))
aeCreateFileEvent(server.el, conn->c.fd, AE_READABLE, tlsEventHandler, conn); aeCreateFileEvent(conn->c.el, conn->c.fd, AE_READABLE, tlsEventHandler, conn);
if (!need_read && (mask & AE_READABLE)) if (!need_read && (mask & AE_READABLE))
aeDeleteFileEvent(server.el, conn->c.fd, AE_READABLE); aeDeleteFileEvent(conn->c.el, conn->c.fd, AE_READABLE);
if (need_write && !(mask & AE_WRITABLE)) if (need_write && !(mask & AE_WRITABLE))
aeCreateFileEvent(server.el, conn->c.fd, AE_WRITABLE, tlsEventHandler, conn); aeCreateFileEvent(conn->c.el, conn->c.fd, AE_WRITABLE, tlsEventHandler, conn);
if (!need_write && (mask & AE_WRITABLE)) if (!need_write && (mask & AE_WRITABLE))
aeDeleteFileEvent(server.el, conn->c.fd, AE_WRITABLE); aeDeleteFileEvent(conn->c.el, conn->c.fd, AE_WRITABLE);
}
/* Add a connection to the list of connections with pending data that has
* already been read from the socket but has not yet been served to the reader. */
static void tlsPendingAdd(tls_connection *conn) {
if (!conn->c.el->privdata[1])
conn->c.el->privdata[1] = listCreate();
list *pending_list = conn->c.el->privdata[1];
if (!conn->pending_list_node) {
listAddNodeTail(pending_list, conn);
conn->pending_list_node = listLast(pending_list);
}
}
/* Removes a connection from the list of connections with pending data. */
static void tlsPendingRemove(tls_connection *conn) {
if (conn->pending_list_node) {
list *pending_list = conn->c.el->privdata[1];
listDelNode(pending_list, conn->pending_list_node);
conn->pending_list_node = NULL;
}
} }
static void tlsHandleEvent(tls_connection *conn, int mask) { static void tlsHandleEvent(tls_connection *conn, int mask) {
@ -718,13 +737,9 @@ static void tlsHandleEvent(tls_connection *conn, int mask) {
* to a list of pending connection that should be handled anyway. */ * to a list of pending connection that should be handled anyway. */
if ((mask & AE_READABLE)) { if ((mask & AE_READABLE)) {
if (SSL_pending(conn->ssl) > 0) { if (SSL_pending(conn->ssl) > 0) {
if (!conn->pending_list_node) { tlsPendingAdd(conn);
listAddNodeTail(pending_list, conn);
conn->pending_list_node = listLast(pending_list);
}
} else if (conn->pending_list_node) { } else if (conn->pending_list_node) {
listDelNode(pending_list, conn->pending_list_node); tlsPendingRemove(conn);
conn->pending_list_node = NULL;
} }
} }
@ -734,7 +749,8 @@ static void tlsHandleEvent(tls_connection *conn, int mask) {
break; break;
} }
updateSSLEvent(conn); /* The event loop may have been unbound during the event processing above. */
if (conn->c.el) updateSSLEvent(conn);
} }
static void tlsEventHandler(struct aeEventLoop *el, int fd, void *clientData, int mask) { static void tlsEventHandler(struct aeEventLoop *el, int fd, void *clientData, int mask) {
@ -748,7 +764,6 @@ static void tlsAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask)
int cport, cfd; int cport, cfd;
int max = server.max_new_tls_conns_per_cycle; int max = server.max_new_tls_conns_per_cycle;
char cip[NET_IP_STR_LEN]; char cip[NET_IP_STR_LEN];
UNUSED(el);
UNUSED(mask); UNUSED(mask);
UNUSED(privdata); UNUSED(privdata);
@ -761,7 +776,7 @@ static void tlsAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask)
return; return;
} }
serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport); serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
acceptCommonHandler(connCreateAcceptedTLS(cfd, &server.tls_auth_clients),0,cip); acceptCommonHandler(connCreateAcceptedTLS(el,cfd,&server.tls_auth_clients), 0, cip);
} }
} }
@ -806,6 +821,7 @@ static void connTLSClose(connection *conn_) {
} }
if (conn->pending_list_node) { if (conn->pending_list_node) {
list *pending_list = conn->c.el->privdata[1];
listDelNode(pending_list, conn->pending_list_node); listDelNode(pending_list, conn->pending_list_node);
conn->pending_list_node = NULL; conn->pending_list_node = NULL;
} }
@ -863,6 +879,33 @@ static int connTLSConnect(connection *conn_, const char *addr, int port, const c
return C_OK; return C_OK;
} }
static void connTLSUnbindEventLoop(connection *conn_) {
tls_connection *conn = (tls_connection *) conn_;
/* We need to remove all events from the old event loop. The subsequent
* updateSSLEvent() will add the appropriate events to the new event loop. */
if (conn->c.el) {
int mask = aeGetFileEvents(conn->c.el, conn->c.fd);
if (mask & AE_READABLE) aeDeleteFileEvent(conn->c.el, conn->c.fd, AE_READABLE);
if (mask & AE_WRITABLE) aeDeleteFileEvent(conn->c.el, conn->c.fd, AE_WRITABLE);
/* Check if there are pending events and handle accordingly. */
int has_pending = conn->pending_list_node != NULL;
if (has_pending) tlsPendingRemove(conn);
}
}
static int connTLSRebindEventLoop(connection *conn_, aeEventLoop *el) {
tls_connection *conn = (tls_connection *) conn_;
serverAssert(!conn->c.el && !conn->c.read_handler &&
!conn->c.write_handler && !conn->pending_list_node);
conn->c.el = el;
if (el && SSL_pending(conn->ssl)) tlsPendingAdd(conn);
/* Add the appropriate events to the new event loop. */
updateSSLEvent((tls_connection *) conn);
return C_OK;
}
static int connTLSWrite(connection *conn_, const void *data, size_t data_len) { static int connTLSWrite(connection *conn_, const void *data, size_t data_len) {
tls_connection *conn = (tls_connection *) conn_; tls_connection *conn = (tls_connection *) conn_;
int ret; int ret;
@ -1044,16 +1087,19 @@ static const char *connTLSGetType(connection *conn_) {
return CONN_TYPE_TLS; return CONN_TYPE_TLS;
} }
static int tlsHasPendingData(void) { static int tlsHasPendingData(struct aeEventLoop *el) {
list *pending_list = el->privdata[1];
if (!pending_list) if (!pending_list)
return 0; return 0;
return listLength(pending_list) > 0; return listLength(pending_list) > 0;
} }
static int tlsProcessPendingData(void) { static int tlsProcessPendingData(struct aeEventLoop *el) {
listIter li; listIter li;
listNode *ln; listNode *ln;
list *pending_list = el->privdata[1];
if (!pending_list) return 0;
int processed = listLength(pending_list); int processed = listLength(pending_list);
listRewind(pending_list,&li); listRewind(pending_list,&li);
while((ln = listNext(&li))) { while((ln = listNext(&li))) {
@ -1114,6 +1160,10 @@ static ConnectionType CT_TLS = {
.blocking_connect = connTLSBlockingConnect, .blocking_connect = connTLSBlockingConnect,
.accept = connTLSAccept, .accept = connTLSAccept,
/* event loop */
.unbind_event_loop = connTLSUnbindEventLoop,
.rebind_event_loop = connTLSRebindEventLoop,
/* IO */ /* IO */
.read = connTLSRead, .read = connTLSRead,
.write = connTLSWrite, .write = connTLSWrite,

View File

@ -253,6 +253,7 @@ void trackingRememberKeys(client *tracking, client *executing) {
* - Following a flush command, to send a single RESP NULL to indicate * - Following a flush command, to send a single RESP NULL to indicate
* that all keys are now invalid. */ * that all keys are now invalid. */
void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) { void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
int paused = 0;
uint64_t old_flags = c->flags; uint64_t old_flags = c->flags;
c->flags |= CLIENT_PUSHING; c->flags |= CLIENT_PUSHING;
@ -275,6 +276,11 @@ void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING; if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
c = redir; c = redir;
using_redirection = 1; using_redirection = 1;
/* Start to touch another client data. */
if (c->running_tid != IOTHREAD_MAIN_THREAD_ID) {
pauseIOThread(c->running_tid);
paused = 1;
}
old_flags = c->flags; old_flags = c->flags;
c->flags |= CLIENT_PUSHING; c->flags |= CLIENT_PUSHING;
} }
@ -296,7 +302,7 @@ void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
* it since RESP2 does not support push messages in the same * it since RESP2 does not support push messages in the same
* connection. */ * connection. */
if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING; if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
return; goto done;
} }
/* Send the "value" part, which is the array of keys. */ /* Send the "value" part, which is the array of keys. */
@ -308,6 +314,17 @@ void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
} }
updateClientMemUsageAndBucket(c); updateClientMemUsageAndBucket(c);
if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING; if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
done:
if (paused) {
if (clientHasPendingReplies(c)) {
serverAssert(!(c->flags & CLIENT_PENDING_WRITE));
/* Actually we install write handler of client which is in IO thread
* event loop, it is safe since the io thread is paused */
connSetWriteHandler(c->conn, sendReplyToClient);
}
resumeIOThread(c->running_tid);
}
} }
/* This function is called when a key is modified in Redis and in the case /* This function is called when a key is modified in Redis and in the case

View File

@ -74,18 +74,19 @@ static int connUnixListen(connListener *listener) {
return C_OK; return C_OK;
} }
static connection *connCreateUnix(void) { static connection *connCreateUnix(struct aeEventLoop *el) {
connection *conn = zcalloc(sizeof(connection)); connection *conn = zcalloc(sizeof(connection));
conn->type = &CT_Unix; conn->type = &CT_Unix;
conn->fd = -1; conn->fd = -1;
conn->iovcnt = IOV_MAX; conn->iovcnt = IOV_MAX;
conn->el = el;
return conn; return conn;
} }
static connection *connCreateAcceptedUnix(int fd, void *priv) { static connection *connCreateAcceptedUnix(struct aeEventLoop *el, int fd, void *priv) {
UNUSED(priv); UNUSED(priv);
connection *conn = connCreateUnix(); connection *conn = connCreateUnix(el);
conn->fd = fd; conn->fd = fd;
conn->state = CONN_STATE_ACCEPTING; conn->state = CONN_STATE_ACCEPTING;
return conn; return conn;
@ -107,7 +108,7 @@ static void connUnixAcceptHandler(aeEventLoop *el, int fd, void *privdata, int m
return; return;
} }
serverLog(LL_VERBOSE,"Accepted connection to %s", server.unixsocket); serverLog(LL_VERBOSE,"Accepted connection to %s", server.unixsocket);
acceptCommonHandler(connCreateAcceptedUnix(cfd, NULL),CLIENT_UNIX_SOCKET,NULL); acceptCommonHandler(connCreateAcceptedUnix(el, cfd, NULL),CLIENT_UNIX_SOCKET,NULL);
} }
} }
@ -123,6 +124,10 @@ static int connUnixAccept(connection *conn, ConnectionCallbackFunc accept_handle
return connectionTypeTcp()->accept(conn, accept_handler); return connectionTypeTcp()->accept(conn, accept_handler);
} }
static int connUnixRebindEventLoop(connection *conn, aeEventLoop *el) {
return connectionTypeTcp()->rebind_event_loop(conn, el);
}
static int connUnixWrite(connection *conn, const void *data, size_t data_len) { static int connUnixWrite(connection *conn, const void *data, size_t data_len) {
return connectionTypeTcp()->write(conn, data, data_len); return connectionTypeTcp()->write(conn, data, data_len);
} }
@ -186,6 +191,10 @@ static ConnectionType CT_Unix = {
.blocking_connect = NULL, .blocking_connect = NULL,
.accept = connUnixAccept, .accept = connUnixAccept,
/* event loop */
.unbind_event_loop = NULL,
.rebind_event_loop = connUnixRebindEventLoop,
/* IO */ /* IO */
.write = connUnixWrite, .write = connUnixWrite,
.writev = connUnixWritev, .writev = connUnixWritev,

View File

@ -54,6 +54,13 @@
#define UNUSED(x) ((void)(x)) #define UNUSED(x) ((void)(x))
/* Selectively define static_assert. Attempt to avoid include server.h in this file. */
#ifndef static_assert
#define static_assert(expr, lit) extern char __static_assert_failure[(expr) ? 1:-1]
#endif
static_assert(UINTPTR_MAX == 0xffffffffffffffff || UINTPTR_MAX == 0xffffffff, "Unsupported pointer size");
/* Glob-style pattern matching. */ /* Glob-style pattern matching. */
static int stringmatchlen_impl(const char *pattern, int patternLen, static int stringmatchlen_impl(const char *pattern, int patternLen,
const char *string, int stringLen, int nocase, int *skipLongerMatches, int nesting) const char *string, int stringLen, int nocase, int *skipLongerMatches, int nesting)
@ -102,24 +109,24 @@ static int stringmatchlen_impl(const char *pattern, int patternLen,
pattern++; pattern++;
patternLen--; patternLen--;
not = pattern[0] == '^'; not = patternLen && pattern[0] == '^';
if (not) { if (not) {
pattern++; pattern++;
patternLen--; patternLen--;
} }
match = 0; match = 0;
while(1) { while(1) {
if (pattern[0] == '\\' && patternLen >= 2) { if (patternLen >= 2 && pattern[0] == '\\') {
pattern++; pattern++;
patternLen--; patternLen--;
if (pattern[0] == string[0]) if (pattern[0] == string[0])
match = 1; match = 1;
} else if (pattern[0] == ']') {
break;
} else if (patternLen == 0) { } else if (patternLen == 0) {
pattern--; pattern--;
patternLen++; patternLen++;
break; break;
} else if (pattern[0] == ']') {
break;
} else if (patternLen >= 3 && pattern[1] == '-') { } else if (patternLen >= 3 && pattern[1] == '-') {
int start = pattern[0]; int start = pattern[0];
int end = pattern[2]; int end = pattern[2];
@ -179,7 +186,7 @@ static int stringmatchlen_impl(const char *pattern, int patternLen,
pattern++; pattern++;
patternLen--; patternLen--;
if (stringLen == 0) { if (stringLen == 0) {
while(*pattern == '*') { while(patternLen && *pattern == '*') {
pattern++; pattern++;
patternLen--; patternLen--;
} }
@ -191,6 +198,43 @@ static int stringmatchlen_impl(const char *pattern, int patternLen,
return 0; return 0;
} }
/*
* glob-style pattern matching to check if a given pattern fully includes
* the prefix of a string. For the match to succeed, the pattern must end with
* an unescaped '*' character.
*
* Returns: 1 if the `pattern` fully matches the `prefixStr`. Returns 0 otherwise.
*/
int prefixmatch(const char *pattern, int patternLen,
const char *prefixStr, int prefixStrLen, int nocase) {
int skipLongerMatches = 0;
/* Step 1: Verify if the pattern matches the prefix string completely. */
if (!stringmatchlen_impl(pattern, patternLen, prefixStr, prefixStrLen, nocase, &skipLongerMatches, 0))
return 0;
/* Step 2: Verify that the pattern ends with an unescaped '*', indicating
* it can match any suffix of the string beyond the prefix. This check
* remains outside stringmatchlen_impl() to keep its complexity manageable.
*/
if (patternLen == 0 || pattern[patternLen - 1] != '*' )
return 0;
/* Count backward the number of consecutive backslashes preceding the '*'
* to determine if the '*' is escaped. */
int backslashCount = 0;
for (int i = patternLen - 2; i >= 0; i--) {
if (pattern[i] == '\\')
++backslashCount;
else
break; /* Stop counting when a non-backslash character is found. */
}
/* Return 1 if the '*' is not escaped (i.e., even count), 0 otherwise. */
return (backslashCount % 2 == 0);
}
/* Glob-style pattern matching to a string. */
int stringmatchlen(const char *pattern, int patternLen, int stringmatchlen(const char *pattern, int patternLen,
const char *string, int stringLen, int nocase) { const char *string, int stringLen, int nocase) {
int skipLongerMatches = 0; int skipLongerMatches = 0;

View File

@ -36,6 +36,8 @@ typedef enum {
LD_STR_HEX /* %La */ LD_STR_HEX /* %La */
} ld2string_mode; } ld2string_mode;
int prefixmatch(const char *pattern, int patternLen, const char *prefixStr,
int prefixStrLen, int nocase);
int stringmatchlen(const char *p, int plen, const char *s, int slen, int nocase); int stringmatchlen(const char *p, int plen, const char *s, int slen, int nocase);
int stringmatch(const char *p, const char *s, int nocase); int stringmatch(const char *p, const char *s, int nocase);
int stringmatchlen_fuzz_test(void); int stringmatchlen_fuzz_test(void);
@ -79,6 +81,19 @@ int snprintf_async_signal_safe(char *to, size_t n, const char *fmt, ...);
size_t redis_strlcpy(char *dst, const char *src, size_t dsize); size_t redis_strlcpy(char *dst, const char *src, size_t dsize);
size_t redis_strlcat(char *dst, const char *src, size_t dsize); size_t redis_strlcat(char *dst, const char *src, size_t dsize);
/* to keep it opt without conditions Works only for: 0 < x < 2^63 */
static inline int log2ceil(size_t x) {
#if UINTPTR_MAX == 0xffffffffffffffff
return 63 - __builtin_clzll(x);
#else
return 31 - __builtin_clz(x);
#endif
}
#ifndef static_assert
#define static_assert(expr, lit) extern char __static_assert_failure[(expr) ? 1:-1]
#endif
#ifdef REDIS_TEST #ifdef REDIS_TEST
int utilTest(int argc, char **argv, int flags); int utilTest(int argc, char **argv, int flags);
#endif #endif

View File

@ -2367,7 +2367,7 @@ int ziplistTest(int argc, char **argv, int flags) {
for (i = 0; i < iteration; i++) { for (i = 0; i < iteration; i++) {
zl = ziplistNew(); zl = ziplistNew();
ref = listCreate(); ref = listCreate();
listSetFreeMethod(ref,(void (*)(void*))sdsfree); listSetFreeMethod(ref, sdsfreegeneric);
len = rand() % 256; len = rand() % 256;
/* Create lists */ /* Create lists */

View File

@ -46,7 +46,7 @@ test "Resharding all the master #0 slots away from it" {
} }
test "Master #0 who lost all slots should turn into a replica without replicas" { test "Master #0 who lost all slots should turn into a replica without replicas" {
wait_for_condition 1000 50 { wait_for_condition 2000 50 {
[RI 0 role] == "slave" && [RI 0 connected_slaves] == 0 [RI 0 role] == "slave" && [RI 0 connected_slaves] == 0
} else { } else {
puts [R 0 info replication] puts [R 0 info replication]

View File

@ -1,18 +1,37 @@
#
# Copyright (c) 2009-Present, Redis Ltd.
# All rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of the Redis Source Available License 2.0
# (RSALv2) or the Server Side Public License v1 (SSPLv1).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
source tests/support/redis.tcl source tests/support/redis.tcl
set ::tlsdir "tests/tls" set ::tlsdir "tests/tls"
proc gen_write_load {host port seconds tls} { # Continuously sends SET commands to the server. If key is omitted, a random key
# is used for every SET command. The value is always random.
proc gen_write_load {host port seconds tls {key ""}} {
set start_time [clock seconds] set start_time [clock seconds]
set r [redis $host $port 1 $tls] set r [redis $host $port 1 $tls]
$r client setname LOAD_HANDLER $r client setname LOAD_HANDLER
$r select 9 $r select 9
while 1 { while 1 {
$r set [expr rand()] [expr rand()] if {$key == ""} {
$r set [expr rand()] [expr rand()]
} else {
$r set $key [expr rand()]
}
if {[clock seconds]-$start_time > $seconds} { if {[clock seconds]-$start_time > $seconds} {
exit 0 exit 0
} }
} }
} }
gen_write_load [lindex $argv 0] [lindex $argv 1] [lindex $argv 2] [lindex $argv 3] gen_write_load [lindex $argv 0] [lindex $argv 1] [lindex $argv 2] [lindex $argv 3] [lindex $argv 4]

View File

@ -1,6 +1,20 @@
#
# Copyright (c) 2009-Present, Redis Ltd.
# All rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of the Redis Source Available License 2.0
# (RSALv2) or the Server Side Public License v1 (SSPLv1).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
# This test group aims to test that all replicas share one global replication buffer, # This test group aims to test that all replicas share one global replication buffer,
# two replicas don't make replication buffer size double, and when there is no replica, # two replicas don't make replication buffer size double, and when there is no replica,
# replica buffer will shrink. # replica buffer will shrink.
foreach rdbchannel {"yes" "no"} {
start_server {tags {"repl external:skip"}} { start_server {tags {"repl external:skip"}} {
start_server {} { start_server {} {
start_server {} { start_server {} {
@ -9,6 +23,10 @@ start_server {} {
set replica2 [srv -2 client] set replica2 [srv -2 client]
set replica3 [srv -1 client] set replica3 [srv -1 client]
$replica1 config set repl-rdb-channel $rdbchannel
$replica2 config set repl-rdb-channel $rdbchannel
$replica3 config set repl-rdb-channel $rdbchannel
set master [srv 0 client] set master [srv 0 client]
set master_host [srv 0 host] set master_host [srv 0 host]
set master_port [srv 0 port] set master_port [srv 0 port]
@ -18,6 +36,7 @@ start_server {} {
$master config set repl-diskless-sync-delay 5 $master config set repl-diskless-sync-delay 5
$master config set repl-diskless-sync-max-replicas 1 $master config set repl-diskless-sync-max-replicas 1
$master config set client-output-buffer-limit "replica 0 0 0" $master config set client-output-buffer-limit "replica 0 0 0"
$master config set repl-rdb-channel $rdbchannel
# Make sure replica3 is synchronized with master # Make sure replica3 is synchronized with master
$replica3 replicaof $master_host $master_port $replica3 replicaof $master_host $master_port
@ -39,7 +58,7 @@ start_server {} {
fail "fail to sync with replicas" fail "fail to sync with replicas"
} }
test {All replicas share one global replication buffer} { test "All replicas share one global replication buffer rdbchannel=$rdbchannel" {
set before_used [s used_memory] set before_used [s used_memory]
populate 1024 "" 1024 ; # Write extra 1M data populate 1024 "" 1024 ; # Write extra 1M data
# New data uses 1M memory, but all replicas use only one # New data uses 1M memory, but all replicas use only one
@ -47,7 +66,13 @@ start_server {} {
# more than double of replication buffer. # more than double of replication buffer.
set repl_buf_mem [s mem_total_replication_buffers] set repl_buf_mem [s mem_total_replication_buffers]
set extra_mem [expr {[s used_memory]-$before_used-1024*1024}] set extra_mem [expr {[s used_memory]-$before_used-1024*1024}]
assert {$extra_mem < 2*$repl_buf_mem} if {$rdbchannel == "yes"} {
# master's replication buffers should not grow
assert {$extra_mem < 1024*1024}
assert {$repl_buf_mem < 1024*1024}
} else {
assert {$extra_mem < 2*$repl_buf_mem}
}
# Kill replica1, replication_buffer will not become smaller # Kill replica1, replication_buffer will not become smaller
catch {$replica1 shutdown nosave} catch {$replica1 shutdown nosave}
@ -59,7 +84,7 @@ start_server {} {
assert_equal $repl_buf_mem [s mem_total_replication_buffers] assert_equal $repl_buf_mem [s mem_total_replication_buffers]
} }
test {Replication buffer will become smaller when no replica uses} { test "Replication buffer will become smaller when no replica uses rdbchannel=$rdbchannel" {
# Make sure replica3 catch up with the master # Make sure replica3 catch up with the master
wait_for_ofs_sync $master $replica3 wait_for_ofs_sync $master $replica3
@ -71,12 +96,18 @@ start_server {} {
} else { } else {
fail "replica2 doesn't disconnect with master" fail "replica2 doesn't disconnect with master"
} }
assert {[expr $repl_buf_mem - 1024*1024] > [s mem_total_replication_buffers]} if {$rdbchannel == "yes"} {
# master's replication buffers should not grow
assert {1024*512 > [s mem_total_replication_buffers]}
} else {
assert {[expr $repl_buf_mem - 1024*1024] > [s mem_total_replication_buffers]}
}
} }
} }
} }
} }
} }
}
# This test group aims to test replication backlog size can outgrow the backlog # This test group aims to test replication backlog size can outgrow the backlog
# limit config if there is a slow replica which keep massive replication buffers, # limit config if there is a slow replica which keep massive replication buffers,
@ -84,6 +115,7 @@ start_server {} {
# partial re-synchronization. Of course, replication backlog memory also can # partial re-synchronization. Of course, replication backlog memory also can
# become smaller when master disconnects with slow replicas since output buffer # become smaller when master disconnects with slow replicas since output buffer
# limit is reached. # limit is reached.
foreach rdbchannel {"yes" "no"} {
start_server {tags {"repl external:skip"}} { start_server {tags {"repl external:skip"}} {
start_server {} { start_server {} {
start_server {} { start_server {} {
@ -98,6 +130,7 @@ start_server {} {
$master config set save "" $master config set save ""
$master config set repl-backlog-size 16384 $master config set repl-backlog-size 16384
$master config set repl-rdb-channel $rdbchannel
$master config set client-output-buffer-limit "replica 0 0 0" $master config set client-output-buffer-limit "replica 0 0 0"
# Executing 'debug digest' on master which has many keys costs much time # Executing 'debug digest' on master which has many keys costs much time
@ -105,12 +138,16 @@ start_server {} {
# with master. # with master.
$master config set repl-timeout 1000 $master config set repl-timeout 1000
$replica1 config set repl-timeout 1000 $replica1 config set repl-timeout 1000
$replica1 config set repl-rdb-channel $rdbchannel
$replica1 config set client-output-buffer-limit "replica 1024 0 0"
$replica2 config set repl-timeout 1000 $replica2 config set repl-timeout 1000
$replica2 config set client-output-buffer-limit "replica 1024 0 0"
$replica2 config set repl-rdb-channel $rdbchannel
$replica1 replicaof $master_host $master_port $replica1 replicaof $master_host $master_port
wait_for_sync $replica1 wait_for_sync $replica1
test {Replication backlog size can outgrow the backlog limit config} { test "Replication backlog size can outgrow the backlog limit config rdbchannel=$rdbchannel" {
# Generating RDB will take 1000 seconds # Generating RDB will take 1000 seconds
$master config set rdb-key-save-delay 1000000 $master config set rdb-key-save-delay 1000000
populate 1000 master 10000 populate 1000 master 10000
@ -124,7 +161,7 @@ start_server {} {
} }
# Replication actual backlog grow more than backlog setting since # Replication actual backlog grow more than backlog setting since
# the slow replica2 kept replication buffer. # the slow replica2 kept replication buffer.
populate 10000 master 10000 populate 20000 master 10000
assert {[s repl_backlog_histlen] > [expr 10000*10000]} assert {[s repl_backlog_histlen] > [expr 10000*10000]}
} }
@ -135,7 +172,7 @@ start_server {} {
fail "Replica offset didn't catch up with the master after too long time" fail "Replica offset didn't catch up with the master after too long time"
} }
test {Replica could use replication buffer (beyond backlog config) for partial resynchronization} { test "Replica could use replication buffer (beyond backlog config) for partial resynchronization rdbchannel=$rdbchannel" {
# replica1 disconnects with master # replica1 disconnects with master
$replica1 replicaof [srv -1 host] [srv -1 port] $replica1 replicaof [srv -1 host] [srv -1 port]
# Write a mass of data that exceeds repl-backlog-size # Write a mass of data that exceeds repl-backlog-size
@ -155,7 +192,7 @@ start_server {} {
assert_equal [$master debug digest] [$replica1 debug digest] assert_equal [$master debug digest] [$replica1 debug digest]
} }
test {Replication backlog memory will become smaller if disconnecting with replica} { test "Replication backlog memory will become smaller if disconnecting with replica rdbchannel=$rdbchannel" {
assert {[s repl_backlog_histlen] > [expr 2*10000*10000]} assert {[s repl_backlog_histlen] > [expr 2*10000*10000]}
assert_equal [s connected_slaves] {2} assert_equal [s connected_slaves] {2}
@ -165,8 +202,11 @@ start_server {} {
r set key [string repeat A [expr 64*1024]] r set key [string repeat A [expr 64*1024]]
# master will close replica2's connection since replica2's output # master will close replica2's connection since replica2's output
# buffer limit is reached, so there only is replica1. # buffer limit is reached, so there only is replica1.
# In case of rdbchannel=yes, main channel will be disconnected only.
wait_for_condition 100 100 { wait_for_condition 100 100 {
[s connected_slaves] eq {1} [s connected_slaves] eq {1} ||
([s connected_slaves] eq {2} &&
[string match {*slave*state=wait_bgsave*} [$master info]])
} else { } else {
fail "master didn't disconnect with replica2" fail "master didn't disconnect with replica2"
} }
@ -185,15 +225,19 @@ start_server {} {
} }
} }
} }
}
test {Partial resynchronization is successful even client-output-buffer-limit is less than repl-backlog-size} { foreach rdbchannel {"yes" "no"} {
test "Partial resynchronization is successful even client-output-buffer-limit is less than repl-backlog-size rdbchannel=$rdbchannel" {
start_server {tags {"repl external:skip"}} { start_server {tags {"repl external:skip"}} {
start_server {} { start_server {} {
r config set save "" r config set save ""
r config set repl-backlog-size 100mb r config set repl-backlog-size 100mb
r config set client-output-buffer-limit "replica 512k 0 0" r config set client-output-buffer-limit "replica 512k 0 0"
r config set repl-rdb-channel $rdbchannel
set replica [srv -1 client] set replica [srv -1 client]
$replica config set repl-rdb-channel $rdbchannel
$replica replicaof [srv 0 host] [srv 0 port] $replica replicaof [srv 0 host] [srv 0 port]
wait_for_sync $replica wait_for_sync $replica
@ -231,7 +275,7 @@ test {Partial resynchronization is successful even client-output-buffer-limit is
} }
# This test was added to make sure big keys added to the backlog do not trigger psync loop. # This test was added to make sure big keys added to the backlog do not trigger psync loop.
test {Replica client-output-buffer size is limited to backlog_limit/16 when no replication data is pending} { test "Replica client-output-buffer size is limited to backlog_limit/16 when no replication data is pending rdbchannel=$rdbchannel" {
proc client_field {r type f} { proc client_field {r type f} {
set client [$r client list type $type] set client [$r client list type $type]
if {![regexp $f=(\[a-zA-Z0-9-\]+) $client - res]} { if {![regexp $f=(\[a-zA-Z0-9-\]+) $client - res]} {
@ -252,6 +296,8 @@ test {Replica client-output-buffer size is limited to backlog_limit/16 when no r
$master config set repl-backlog-size 16384 $master config set repl-backlog-size 16384
$master config set client-output-buffer-limit "replica 32768 32768 60" $master config set client-output-buffer-limit "replica 32768 32768 60"
$master config set repl-rdb-channel $rdbchannel
$replica config set repl-rdb-channel $rdbchannel
# Key has has to be larger than replica client-output-buffer limit. # Key has has to be larger than replica client-output-buffer limit.
set keysize [expr 256*1024] set keysize [expr 256*1024]
@ -304,4 +350,5 @@ test {Replica client-output-buffer size is limited to backlog_limit/16 when no r
} }
} }
} }
}

View File

@ -1,3 +1,16 @@
#
# Copyright (c) 2009-Present, Redis Ltd.
# All rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of the Redis Source Available License 2.0
# (RSALv2) or the Server Side Public License v1 (SSPLv1).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
# Creates a master-slave pair and breaks the link continuously to force # Creates a master-slave pair and breaks the link continuously to force
# partial resyncs attempts, all this while flooding the master with # partial resyncs attempts, all this while flooding the master with
# write queries. # write queries.
@ -8,7 +21,7 @@
# If reconnect is > 0, the test actually try to break the connection and # If reconnect is > 0, the test actually try to break the connection and
# reconnect with the master, otherwise just the initial synchronization is # reconnect with the master, otherwise just the initial synchronization is
# checked for consistency. # checked for consistency.
proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reconnect} { proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reconnect rdbchannel} {
start_server {tags {"repl"} overrides {save {}}} { start_server {tags {"repl"} overrides {save {}}} {
start_server {overrides {save {}}} { start_server {overrides {save {}}} {
@ -21,7 +34,9 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reco
$master config set repl-backlog-ttl $backlog_ttl $master config set repl-backlog-ttl $backlog_ttl
$master config set repl-diskless-sync $mdl $master config set repl-diskless-sync $mdl
$master config set repl-diskless-sync-delay 1 $master config set repl-diskless-sync-delay 1
$master config set repl-rdb-channel $rdbchannel
$slave config set repl-diskless-load $sdl $slave config set repl-diskless-load $sdl
$slave config set repl-rdb-channel $rdbchannel
set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000] set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000]
set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000] set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000]
@ -46,7 +61,7 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reco
} }
} }
test "Test replication partial resync: $descr (diskless: $mdl, $sdl, reconnect: $reconnect)" { test "Test replication partial resync: $descr (diskless: $mdl, $sdl, reconnect: $reconnect, rdbchannel: $rdbchannel)" {
# Now while the clients are writing data, break the maste-slave # Now while the clients are writing data, break the maste-slave
# link multiple times. # link multiple times.
if ($reconnect) { if ($reconnect) {
@ -120,24 +135,31 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reco
tags {"external:skip"} { tags {"external:skip"} {
foreach mdl {no yes} { foreach mdl {no yes} {
foreach sdl {disabled swapdb} { foreach sdl {disabled swapdb} {
test_psync {no reconnection, just sync} 6 1000000 3600 0 { foreach rdbchannel {yes no} {
} $mdl $sdl 0 if {$rdbchannel == "yes" && $mdl == "no"} {
# rdbchannel replication requires repl-diskless-sync enabled
continue
}
test_psync {ok psync} 6 100000000 3600 0 { test_psync {no reconnection, just sync} 6 1000000 3600 0 {
assert {[s -1 sync_partial_ok] > 0} } $mdl $sdl 0 $rdbchannel
} $mdl $sdl 1
test_psync {no backlog} 6 100 3600 0.5 { test_psync {ok psync} 6 100000000 3600 0 {
assert {[s -1 sync_partial_err] > 0} assert {[s -1 sync_partial_ok] > 0}
} $mdl $sdl 1 } $mdl $sdl 1 $rdbchannel
test_psync {ok after delay} 3 100000000 3600 3 { test_psync {no backlog} 6 100 3600 0.5 {
assert {[s -1 sync_partial_ok] > 0} assert {[s -1 sync_partial_err] > 0}
} $mdl $sdl 1 } $mdl $sdl 1 $rdbchannel
test_psync {backlog expired} 3 100000000 1 3 { test_psync {ok after delay} 3 100000000 3600 3 {
assert {[s -1 sync_partial_err] > 0} assert {[s -1 sync_partial_ok] > 0}
} $mdl $sdl 1 } $mdl $sdl 1 $rdbchannel
test_psync {backlog expired} 3 100000000 1 3 {
assert {[s -1 sync_partial_err] > 0}
} $mdl $sdl 1 $rdbchannel
}
} }
} }
} }

View File

@ -0,0 +1,795 @@
#
# Copyright (c) 2009-Present, Redis Ltd.
# All rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of the Redis Source Available License 2.0
# (RSALv2) or the Server Side Public License v1 (SSPLv1).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
# Returns either main or rdbchannel client id
# Assumes there is one replica with two channels
proc get_replica_client_id {master rdbchannel} {
set input [$master client list type replica]
foreach line [split $input "\n"] {
if {[regexp {id=(\d+).*flags=(\S+)} $line match id flags]} {
if {$rdbchannel == "yes"} {
# rdbchannel will have C flag
if {[string match *C* $flags]} {
return $id
}
} else {
return $id
}
}
}
error "Replica not found"
}
start_server {tags {"repl external:skip"}} {
set replica1 [srv 0 client]
start_server {} {
set replica2 [srv 0 client]
start_server {} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
$master config set repl-diskless-sync yes
$master config set repl-rdb-channel yes
populate 1000 master 10
test "Test replication with multiple replicas (rdbchannel enabled on both)" {
$replica1 config set repl-rdb-channel yes
$replica1 replicaof $master_host $master_port
$replica2 config set repl-rdb-channel yes
$replica2 replicaof $master_host $master_port
wait_replica_online $master 0
wait_replica_online $master 1
$master set x 1
# Wait until replicas catch master
wait_for_ofs_sync $master $replica1
wait_for_ofs_sync $master $replica2
# Verify db's are identical
assert_morethan [$master dbsize] 0
assert_equal [$master get x] 1
assert_equal [$master debug digest] [$replica1 debug digest]
assert_equal [$master debug digest] [$replica2 debug digest]
}
test "Test replication with multiple replicas (rdbchannel enabled on one of them)" {
# Allow both replicas to ask for sync
$master config set repl-diskless-sync-delay 5
$replica1 replicaof no one
$replica2 replicaof no one
$replica1 config set repl-rdb-channel yes
$replica2 config set repl-rdb-channel no
set prev_forks [s 0 total_forks]
$master set x 2
# There will be two forks subsequently, one for rdbchannel
# replica another for the replica without rdbchannel config.
$replica1 replicaof $master_host $master_port
$replica2 replicaof $master_host $master_port
set res [wait_for_log_messages 0 {"*Starting BGSAVE* replicas sockets (rdb-channel)*"} 0 2000 10]
set loglines [lindex $res 1]
wait_for_log_messages 0 {"*Starting BGSAVE* replicas sockets*"} $loglines 2000 10
wait_replica_online $master 0 100 100
wait_replica_online $master 1 100 100
# Verify two new forks.
assert_equal [s 0 total_forks] [expr $prev_forks + 2]
wait_for_ofs_sync $master $replica1
wait_for_ofs_sync $master $replica2
# Verify db's are identical
assert_equal [$replica1 get x] 2
assert_equal [$replica2 get x] 2
assert_equal [$master debug digest] [$replica1 debug digest]
assert_equal [$master debug digest] [$replica2 debug digest]
}
test "Test rdbchannel is not used if repl-diskless-sync config is disabled on master" {
$replica1 replicaof no one
$replica2 replicaof no one
$master config set repl-diskless-sync-delay 0
$master config set repl-diskless-sync no
$master set x 3
$replica1 replicaof $master_host $master_port
# Verify log message does not mention rdbchannel
wait_for_log_messages 0 {"*Starting BGSAVE for SYNC with target: disk*"} 0 2000 1
wait_replica_online $master 0
wait_for_ofs_sync $master $replica1
# Verify db's are identical
assert_equal [$replica1 get x] 3
assert_equal [$master debug digest] [$replica1 debug digest]
}
}
}
}
start_server {tags {"repl external:skip"}} {
set replica [srv 0 client]
set replica_pid [srv 0 pid]
start_server {} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
$master config set repl-rdb-channel yes
$replica config set repl-rdb-channel yes
# Reuse this test to verify large key delivery
$master config set rdbcompression no
$master config set rdb-key-save-delay 3000
populate 1000 prefix1 10
populate 5 prefix2 3000000
populate 5 prefix3 2000000
populate 5 prefix4 1000000
# On master info output, we should see state transition in this order:
# 1. wait_bgsave: Replica receives psync error (+RDBCHANNELSYNC)
# 2. send_bulk_and_stream: Replica opens rdbchannel and delivery started
# 3. online: Sync is completed
test "Test replica state should start with wait_bgsave" {
$replica config set key-load-delay 100000
# Pause replica before opening rdb channel conn
$replica debug repl-pause before-rdb-channel
$replica replicaof $master_host $master_port
wait_for_condition 50 200 {
[s 0 connected_slaves] == 1 &&
[string match "*wait_bgsave*" [s 0 slave0]]
} else {
fail "replica failed"
}
}
test "Test replica state advances to send_bulk_and_stream when rdbchannel connects" {
$master set x 1
resume_process $replica_pid
wait_for_condition 50 200 {
[s 0 connected_slaves] == 1 &&
[s 0 rdb_bgsave_in_progress] == 1 &&
[string match "*send_bulk_and_stream*" [s 0 slave0]]
} else {
fail "replica failed"
}
}
test "Test replica rdbchannel client has SC flag on client list output" {
set input [$master client list type replica]
# There will two replicas, second one should be rdbchannel
set trimmed_input [string trimright $input]
set lines [split $trimmed_input "\n"]
if {[llength $lines] < 2} {
error "There is no second line in the input: $input"
}
set second_line [lindex $lines 1]
# Check if 'flags=SC' exists in the second line
if {![regexp {flags=SC} $second_line]} {
error "Flags are not 'SC' in the second line: $second_line"
}
}
test "Test replica state advances to online when fullsync is completed" {
# Speed up loading
$replica config set key-load-delay 0
wait_replica_online $master 0 100 1000
wait_for_ofs_sync $master $replica
wait_for_condition 50 200 {
[s 0 rdb_bgsave_in_progress] == 0 &&
[s 0 connected_slaves] == 1 &&
[string match "*online*" [s 0 slave0]]
} else {
fail "replica failed"
}
wait_replica_online $master 0 100 1000
wait_for_ofs_sync $master $replica
# Verify db's are identical
assert_morethan [$master dbsize] 0
assert_equal [$master debug digest] [$replica debug digest]
}
}
}
start_server {tags {"repl external:skip"}} {
set replica [srv 0 client]
start_server {} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
$master config set repl-rdb-channel yes
$replica config set repl-rdb-channel yes
test "Test master memory does not increase during replication" {
# Put some delay to rdb generation. If master doesn't forward
# incoming traffic to replica, master's replication buffer will grow
$master config set rdb-key-save-delay 200
$master config set repl-backlog-size 5mb
populate 10000 master 10000
# Start write traffic
set load_handle [start_write_load $master_host $master_port 100 "key1"]
set prev_used [s 0 used_memory]
$replica replicaof $master_host $master_port
set backlog_size [lindex [$master config get repl-backlog-size] 1]
# Verify used_memory stays low
set max_retry 1000
set prev_buf_size 0
while {$max_retry} {
assert_lessthan [expr [s 0 used_memory] - $prev_used] 20000000
assert_lessthan_equal [s 0 mem_total_replication_buffers] [expr {$backlog_size + 1000000}]
# Check replica state
if {[string match *slave0*state=online* [$master info]] &&
[s -1 master_link_status] == "up"} {
break
} else {
incr max_retry -1
after 10
}
}
if {$max_retry == 0} {
error "assertion:Replica not in sync after 10 seconds"
}
stop_write_load $load_handle
}
}
}
start_server {tags {"repl external:skip"}} {
set replica [srv 0 client]
start_server {} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
$master config set repl-rdb-channel yes
$replica config set repl-rdb-channel yes
test "Test replication stream buffer becomes full on replica" {
# For replication stream accumulation, replica inherits slave output
# buffer limit as the size limit. In this test, we create traffic to
# fill the buffer fully. Once the limit is reached, accumulation
# will stop. This is not a failure scenario though. From that point,
# further accumulation may occur on master side. Replication should
# be completed successfully.
# Create some artificial delay for rdb delivery and load. We'll
# generate some traffic to fill the replication buffer.
$master config set rdb-key-save-delay 1000
$replica config set key-load-delay 1000
$replica config set client-output-buffer-limit "replica 64kb 64kb 0"
populate 2000 master 1
set prev_sync_full [s 0 sync_full]
$replica replicaof $master_host $master_port
# Wait for replica to establish psync using main channel
wait_for_condition 500 1000 {
[string match "*state=send_bulk_and_stream*" [s 0 slave0]]
} else {
fail "replica didn't start sync"
}
# Create some traffic on replication stream
populate 100 master 100000
# Wait for replica's buffer limit reached
wait_for_log_messages -1 {"*Replication buffer limit has been reached*"} 0 1000 10
# Speed up loading
$replica config set key-load-delay 0
# Wait until sync is successful
wait_for_condition 200 200 {
[status $master master_repl_offset] eq [status $replica master_repl_offset] &&
[status $master master_repl_offset] eq [status $replica slave_repl_offset]
} else {
fail "replica offsets didn't match in time"
}
# Verify sync was not interrupted.
assert_equal [s 0 sync_full] [expr $prev_sync_full + 1]
# Verify db's are identical
assert_morethan [$master dbsize] 0
assert_equal [$master debug digest] [$replica debug digest]
}
test "Test replication stream buffer config replica-full-sync-buffer-limit" {
# By default, replica inherits client-output-buffer-limit of replica
# to limit accumulated repl data during rdbchannel sync.
# replica-full-sync-buffer-limit should override it if it is set.
$replica replicaof no one
# Create some artificial delay for rdb delivery and load. We'll
# generate some traffic to fill the replication buffer.
$master config set rdb-key-save-delay 1000
$replica config set key-load-delay 1000
$replica config set client-output-buffer-limit "replica 1024 1024 0"
$replica config set replica-full-sync-buffer-limit 20mb
populate 2000 master 1
$replica replicaof $master_host $master_port
# Wait until replication starts
wait_for_condition 500 1000 {
[string match "*state=send_bulk_and_stream*" [s 0 slave0]]
} else {
fail "replica didn't start sync"
}
# Create some traffic on replication stream
populate 100 master 100000
# Make sure config is used, we accumulated more than
# client-output-buffer-limit
assert_morethan [s -1 replica_full_sync_buffer_size] 1024
}
}
}
start_server {tags {"repl external:skip"}} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
set master_pid [srv 0 pid]
set loglines [count_log_lines 0]
$master config set repl-diskless-sync yes
$master config set repl-rdb-channel yes
$master config set repl-backlog-size 1mb
$master config set client-output-buffer-limit "replica 100k 0 0"
$master config set loglevel debug
$master config set repl-diskless-sync-delay 3
start_server {} {
set replica [srv 0 client]
set replica_pid [srv 0 pid]
$replica config set repl-rdb-channel yes
$replica config set loglevel debug
$replica config set repl-timeout 10
$replica config set key-load-delay 10000
$replica config set loading-process-events-interval-bytes 1024
test "Test master disconnects replica when output buffer limit is reached" {
populate 20000 master 100 -1
$replica replicaof $master_host $master_port
wait_for_condition 50 200 {
[s 0 loading] == 1
} else {
fail "[s 0 loading] sdsdad"
}
# Generate some traffic for backlog ~2mb
populate 20 master 1000000 -1
set res [wait_for_log_messages -1 {"*Client * closed * for overcoming of output buffer limits.*"} $loglines 1000 10]
set loglines [lindex $res 1]
$replica config set key-load-delay 0
# Wait until replica loads RDB
wait_for_log_messages 0 {"*Done loading RDB*"} 0 1000 10
}
test "Test replication recovers after output buffer failures" {
# Verify system is operational
$master set x 1
# Wait until replica catches up
wait_replica_online $master 0 1000 100
wait_for_ofs_sync $master $replica
# Verify db's are identical
assert_morethan [$master dbsize] 0
assert_equal [$replica get x] 1
assert_equal [$master debug digest] [$replica debug digest]
}
}
}
start_server {tags {"repl external:skip"}} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
$master config set repl-diskless-sync yes
$master config set repl-rdb-channel yes
$master config set rdb-key-save-delay 300
$master config set client-output-buffer-limit "replica 0 0 0"
$master config set repl-diskless-sync-delay 5
$master config set loglevel debug
populate 10000 master 1
start_server {} {
set replica1 [srv 0 client]
$replica1 config set repl-rdb-channel yes
$replica1 config set loglevel debug
start_server {} {
set replica2 [srv 0 client]
$replica2 config set repl-rdb-channel yes
$replica2 config set loglevel debug
set load_handle [start_write_load $master_host $master_port 100 "key"]
test "Test master continues RDB delivery if not all replicas are dropped" {
$replica1 replicaof $master_host $master_port
$replica2 replicaof $master_host $master_port
wait_for_condition 50 200 {
[s -2 rdb_bgsave_in_progress] == 1
} else {
fail "Sync did not start"
}
# Wait for both replicas main conns to establish psync
wait_for_condition 500 100 {
[s -2 connected_slaves] == 2
} else {
fail "Replicas didn't establish psync:
sync_partial_ok: [s -2 sync_partial_ok]"
}
# kill one of the replicas
catch {$replica1 shutdown nosave}
# Wait until replica completes full sync
# Verify there is no other full sync attempt
wait_for_condition 50 1000 {
[s 0 master_link_status] == "up" &&
[s -2 sync_full] == 2 &&
[s -2 connected_slaves] == 1
} else {
fail "Sync session did not continue
master_link_status: [s 0 master_link_status]
sync_full:[s -2 sync_full]
connected_slaves: [s -2 connected_slaves]"
}
}
test "Test master aborts rdb delivery if all replicas are dropped" {
$replica2 replicaof no one
# Start replication
$replica2 replicaof $master_host $master_port
wait_for_condition 50 1000 {
[s -2 rdb_bgsave_in_progress] == 1
} else {
fail "Sync did not start"
}
set loglines [count_log_lines -2]
# kill replica
catch {$replica2 shutdown nosave}
# Verify master aborts rdb save
wait_for_condition 50 1000 {
[s -2 rdb_bgsave_in_progress] == 0 &&
[s -2 connected_slaves] == 0
} else {
fail "Master should abort the sync
rdb_bgsave_in_progress:[s -2 rdb_bgsave_in_progress]
connected_slaves: [s -2 connected_slaves]"
}
wait_for_log_messages -2 {"*Background transfer error*"} $loglines 1000 50
}
stop_write_load $load_handle
}
}
}
start_server {tags {"repl external:skip"}} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
$master config set repl-diskless-sync yes
$master config set repl-rdb-channel yes
$master config set loglevel debug
$master config set rdb-key-save-delay 1000
populate 3000 prefix1 1
populate 100 prefix2 100000
start_server {} {
set replica [srv 0 client]
set replica_pid [srv 0 pid]
$replica config set repl-rdb-channel yes
$replica config set loglevel debug
$replica config set repl-timeout 10
set load_handle [start_write_load $master_host $master_port 100 "key"]
test "Test replica recovers when rdb channel connection is killed" {
$replica replicaof $master_host $master_port
# Wait for sync session to start
wait_for_condition 500 200 {
[string match "*state=send_bulk_and_stream*" [s -1 slave0]] &&
[s -1 rdb_bgsave_in_progress] eq 1
} else {
fail "replica didn't start sync session in time"
}
set loglines [count_log_lines -1]
# Kill rdb channel client
set id [get_replica_client_id $master yes]
$master client kill id $id
wait_for_log_messages -1 {"*Background transfer error*"} $loglines 1000 10
# Verify master rejects main-ch-client-id after connection is killed
assert_error {*Unrecognized*} {$master replconf main-ch-client-id $id}
# Replica should retry
wait_for_condition 500 200 {
[string match "*state=send_bulk_and_stream*" [s -1 slave0]] &&
[s -1 rdb_bgsave_in_progress] eq 1
} else {
fail "replica didn't retry after connection close"
}
}
test "Test replica recovers when main channel connection is killed" {
set loglines [count_log_lines -1]
# Kill main channel client
set id [get_replica_client_id $master yes]
$master client kill id $id
wait_for_log_messages -1 {"*Background transfer error*"} $loglines 1000 20
# Replica should retry
wait_for_condition 500 2000 {
[string match "*state=send_bulk_and_stream*" [s -1 slave0]] &&
[s -1 rdb_bgsave_in_progress] eq 1
} else {
fail "replica didn't retry after connection close"
}
}
stop_write_load $load_handle
test "Test replica recovers connection failures" {
# Wait until replica catches up
wait_replica_online $master 0 1000 100
wait_for_ofs_sync $master $replica
# Verify db's are identical
assert_morethan [$master dbsize] 0
assert_equal [$master debug digest] [$replica debug digest]
}
}
}
start_server {tags {"repl external:skip"}} {
set replica [srv 0 client]
set replica_pid [srv 0 pid]
start_server {} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
test "Test master connection drops while streaming repl buffer into the db" {
# Just after replica loads RDB, it will stream repl buffer into the
# db. During streaming, we kill the master connection. Replica
# will abort streaming and then try another psync with master.
$master config set rdb-key-save-delay 1000
$master config set repl-rdb-channel yes
$master config set repl-diskless-sync yes
$replica config set repl-rdb-channel yes
$replica config set loading-process-events-interval-bytes 1024
# Populate db and start write traffic
populate 2000 master 1000
set load_handle [start_write_load $master_host $master_port 100 "key1"]
# Replica will pause in the loop of repl buffer streaming
$replica debug repl-pause on-streaming-repl-buf
$replica replicaof $master_host $master_port
# Check if repl stream accumulation is started.
wait_for_condition 50 1000 {
[s -1 replica_full_sync_buffer_size] > 0
} else {
fail "repl stream accumulation not started"
}
# Wait until replica starts streaming repl buffer
wait_for_log_messages -1 {"*Starting to stream replication buffer*"} 0 2000 10
stop_write_load $load_handle
$master config set rdb-key-save-delay 0
# Kill master connection and resume the process
$replica deferred 1
$replica client kill type master
$replica debug repl-pause clear
resume_process $replica_pid
$replica read
$replica read
$replica deferred 0
wait_for_log_messages -1 {"*Master client was freed while streaming*"} 0 500 10
# Quick check for stats test coverage
assert_morethan_equal [s -1 replica_full_sync_buffer_peak] [s -1 replica_full_sync_buffer_size]
# Wait until replica recovers and verify db's are identical
wait_replica_online $master 0 1000 10
wait_for_ofs_sync $master $replica
assert_morethan [$master dbsize] 0
assert_equal [$master debug digest] [$replica debug digest]
}
}
}
start_server {tags {"repl external:skip"}} {
set replica [srv 0 client]
set replica_pid [srv 0 pid]
start_server {} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
test "Test main channel connection drops while loading rdb (disk based)" {
# While loading rdb, we kill main channel connection.
# We expect replica to complete loading RDB and then try psync
# with the master.
$master config set repl-rdb-channel yes
$replica config set repl-rdb-channel yes
$replica config set repl-diskless-load disabled
$replica config set key-load-delay 10000
$replica config set loading-process-events-interval-bytes 1024
# Populate db and start write traffic
populate 10000 master 100
$replica replicaof $master_host $master_port
# Wait until replica starts loading
wait_for_condition 50 200 {
[s -1 loading] == 1
} else {
fail "replica did not start loading"
}
# Kill replica connections
$master client kill type replica
$master set x 1
# At this point, we expect replica to complete loading RDB. Then,
# it will try psync with master.
wait_for_log_messages -1 {"*Aborting rdb channel sync while loading the RDB*"} 0 2000 10
wait_for_log_messages -1 {"*After loading RDB, replica will try psync with master*"} 0 2000 10
# Speed up loading
$replica config set key-load-delay 0
# Wait until replica becomes online
wait_replica_online $master 0 100 100
# Verify there is another successful psync and no other full sync
wait_for_condition 50 200 {
[s 0 sync_full] == 1 &&
[s 0 sync_partial_ok] == 1
} else {
fail "psync was not successful [s 0 sync_full] [s 0 sync_partial_ok]"
}
# Verify db's are identical after recovery
wait_for_ofs_sync $master $replica
assert_morethan [$master dbsize] 0
assert_equal [$master debug digest] [$replica debug digest]
}
}
}
start_server {tags {"repl external:skip"}} {
set replica [srv 0 client]
set replica_pid [srv 0 pid]
start_server {} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
test "Test main channel connection drops while loading rdb (diskless)" {
# While loading rdb, kill both main and rdbchannel connections.
# We expect replica to abort sync and later retry again.
$master config set repl-rdb-channel yes
$replica config set repl-rdb-channel yes
$replica config set repl-diskless-load swapdb
$replica config set key-load-delay 10000
$replica config set loading-process-events-interval-bytes 1024
# Populate db and start write traffic
populate 10000 master 100
$replica replicaof $master_host $master_port
# Wait until replica starts loading
wait_for_condition 50 200 {
[s -1 loading] == 1
} else {
fail "replica did not start loading"
}
# Kill replica connections
$master client kill type replica
$master set x 1
# At this point, we expect replica to abort loading RDB.
wait_for_log_messages -1 {"*Aborting rdb channel sync while loading the RDB*"} 0 2000 10
wait_for_log_messages -1 {"*Failed trying to load the MASTER synchronization DB from socket*"} 0 2000 10
# Speed up loading
$replica config set key-load-delay 0
stop_write_load $load_handle
# Wait until replica recovers and becomes online
wait_replica_online $master 0 100 100
# Verify replica attempts another full sync
wait_for_condition 50 200 {
[s 0 sync_full] == 2 &&
[s 0 sync_partial_ok] == 0
} else {
fail "sync was not successful [s 0 sync_full] [s 0 sync_partial_ok]"
}
# Verify db's are identical after recovery
wait_for_ofs_sync $master $replica
assert_morethan [$master dbsize] 0
assert_equal [$master debug digest] [$replica debug digest]
}
}
}

View File

@ -1,3 +1,16 @@
#
# Copyright (c) 2009-Present, Redis Ltd.
# All rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of the Redis Source Available License 2.0
# (RSALv2) or the Server Side Public License v1 (SSPLv1).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
proc log_file_matches {log pattern} { proc log_file_matches {log pattern} {
set fp [open $log r] set fp [open $log r]
set content [read $fp] set content [read $fp]
@ -303,7 +316,7 @@ start_server {tags {"repl external:skip"}} {
} }
} }
foreach mdl {no yes} { foreach mdl {no yes} rdbchannel {no yes} {
foreach sdl {disabled swapdb} { foreach sdl {disabled swapdb} {
start_server {tags {"repl external:skip"} overrides {save {}}} { start_server {tags {"repl external:skip"} overrides {save {}}} {
set master [srv 0 client] set master [srv 0 client]
@ -319,7 +332,13 @@ foreach mdl {no yes} {
lappend slaves [srv 0 client] lappend slaves [srv 0 client]
start_server {overrides {save {}}} { start_server {overrides {save {}}} {
lappend slaves [srv 0 client] lappend slaves [srv 0 client]
test "Connect multiple replicas at the same time (issue #141), master diskless=$mdl, replica diskless=$sdl" { test "Connect multiple replicas at the same time (issue #141), master diskless=$mdl, replica diskless=$sdl, rdbchannel=$rdbchannel" {
$master config set repl-rdb-channel $rdbchannel
[lindex $slaves 0] config set repl-rdb-channel $rdbchannel
[lindex $slaves 1] config set repl-rdb-channel $rdbchannel
[lindex $slaves 2] config set repl-rdb-channel $rdbchannel
# start load handles only inside the test, so that the test can be skipped # start load handles only inside the test, so that the test can be skipped
set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000000] set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000000]
set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000000] set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000000]
@ -438,7 +457,7 @@ start_server {tags {"repl external:skip"} overrides {save {}}} {
} }
# Diskless load swapdb when NOT async_loading (different master replid) # Diskless load swapdb when NOT async_loading (different master replid)
foreach testType {Successful Aborted} { foreach testType {Successful Aborted} rdbchannel {yes no} {
start_server {tags {"repl external:skip"}} { start_server {tags {"repl external:skip"}} {
set replica [srv 0 client] set replica [srv 0 client]
set replica_host [srv 0 host] set replica_host [srv 0 host]
@ -453,6 +472,7 @@ foreach testType {Successful Aborted} {
$master config set repl-diskless-sync yes $master config set repl-diskless-sync yes
$master config set repl-diskless-sync-delay 0 $master config set repl-diskless-sync-delay 0
$master config set save "" $master config set save ""
$master config set repl-rdb-channel $rdbchannel
$replica config set repl-diskless-load swapdb $replica config set repl-diskless-load swapdb
$replica config set save "" $replica config set save ""
@ -474,7 +494,7 @@ foreach testType {Successful Aborted} {
# Start the replication process # Start the replication process
$replica replicaof $master_host $master_port $replica replicaof $master_host $master_port
test {Diskless load swapdb (different replid): replica enter loading} { test "Diskless load swapdb (different replid): replica enter loading rdbchannel=$rdbchannel" {
# Wait for the replica to start reading the rdb # Wait for the replica to start reading the rdb
wait_for_condition 100 100 { wait_for_condition 100 100 {
[s -1 loading] eq 1 [s -1 loading] eq 1
@ -498,7 +518,7 @@ foreach testType {Successful Aborted} {
fail "Replica didn't disconnect" fail "Replica didn't disconnect"
} }
test {Diskless load swapdb (different replid): old database is exposed after replication fails} { test "Diskless load swapdb (different replid): old database is exposed after replication fails rdbchannel=$rdbchannel" {
# Ensure we see old values from replica # Ensure we see old values from replica
assert_equal [$replica get mykey] "myvalue" assert_equal [$replica get mykey] "myvalue"
@ -590,8 +610,8 @@ foreach testType {Successful Aborted} {
if {$testType == "Aborted"} { if {$testType == "Aborted"} {
# Set master with a slow rdb generation, so that we can easily intercept loading # Set master with a slow rdb generation, so that we can easily intercept loading
# 10ms per key, with 2000 keys is 20 seconds # 20ms per key, with 2000 keys is 40 seconds
$master config set rdb-key-save-delay 10000 $master config set rdb-key-save-delay 20000
} }
# Force the replica to try another full sync (this time it will have matching master replid) # Force the replica to try another full sync (this time it will have matching master replid)
@ -862,6 +882,7 @@ start_server {tags {"repl external:skip"} overrides {save ""}} {
# we also need the replica to process requests during transfer (which it does only once in 2mb) # we also need the replica to process requests during transfer (which it does only once in 2mb)
$master debug populate 20000 test 10000 $master debug populate 20000 test 10000
$master config set rdbcompression no $master config set rdbcompression no
$master config set repl-rdb-channel no
# If running on Linux, we also measure utime/stime to detect possible I/O handling issues # If running on Linux, we also measure utime/stime to detect possible I/O handling issues
set os [catch {exec uname}] set os [catch {exec uname}]
set measure_time [expr {$os == "Linux"} ? 1 : 0] set measure_time [expr {$os == "Linux"} ? 1 : 0]
@ -1009,6 +1030,7 @@ test "diskless replication child being killed is collected" {
set master_pid [srv 0 pid] set master_pid [srv 0 pid]
$master config set repl-diskless-sync yes $master config set repl-diskless-sync yes
$master config set repl-diskless-sync-delay 0 $master config set repl-diskless-sync-delay 0
$master config set repl-rdb-channel no
# put enough data in the db that the rdb file will be bigger than the socket buffers # put enough data in the db that the rdb file will be bigger than the socket buffers
$master debug populate 20000 test 10000 $master debug populate 20000 test 10000
$master config set rdbcompression no $master config set rdbcompression no
@ -1269,7 +1291,8 @@ start_server {tags {"repl external:skip"}} {
r slaveof $master2_host $master2_port r slaveof $master2_host $master2_port
wait_for_condition 50 100 { wait_for_condition 50 100 {
([s -2 rdb_bgsave_in_progress] == 1) && ([s -2 rdb_bgsave_in_progress] == 1) &&
([string match "*wait_bgsave*" [s -2 slave0]]) ([string match "*wait_bgsave*" [s -2 slave0]] ||
[string match "*send_bulk_and_stream*" [s -2 slave0]])
} else { } else {
fail "full sync didn't start" fail "full sync didn't start"
} }

View File

@ -156,6 +156,11 @@ test "Shutting down master waits for replica then fails" {
set rd2 [redis_deferring_client -1] set rd2 [redis_deferring_client -1]
$rd1 shutdown $rd1 shutdown
$rd2 shutdown $rd2 shutdown
wait_for_condition 100 10 {
[llength [regexp -all -inline {cmd=shutdown} [$master client list]]] eq 2
} else {
fail "shutdown did not arrive"
}
set info_clients [$master info clients] set info_clients [$master info clients]
assert_match "*connected_clients:3*" $info_clients assert_match "*connected_clients:3*" $info_clients
assert_match "*blocked_clients:2*" $info_clients assert_match "*blocked_clients:2*" $info_clients
@ -209,6 +214,11 @@ test "Shutting down master waits for replica then aborted" {
set rd2 [redis_deferring_client -1] set rd2 [redis_deferring_client -1]
$rd1 shutdown $rd1 shutdown
$rd2 shutdown $rd2 shutdown
wait_for_condition 100 10 {
[llength [regexp -all -inline {cmd=shutdown} [$master client list]]] eq 2
} else {
fail "shutdown did not arrive"
}
set info_clients [$master info clients] set info_clients [$master info clients]
assert_match "*connected_clients:3*" $info_clients assert_match "*connected_clients:3*" $info_clients
assert_match "*blocked_clients:2*" $info_clients assert_match "*blocked_clients:2*" $info_clients

View File

@ -51,6 +51,52 @@ int set_aclcheck_key(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
return REDISMODULE_OK; return REDISMODULE_OK;
} }
/* A wrap for SET command with ACL check on the key. */
int set_aclcheck_prefixkey(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc < 4) {
return RedisModule_WrongArity(ctx);
}
int permissions;
const char *flags = RedisModule_StringPtrLen(argv[1], NULL);
if (!strcasecmp(flags, "W")) {
permissions = REDISMODULE_CMD_KEY_UPDATE;
} else if (!strcasecmp(flags, "R")) {
permissions = REDISMODULE_CMD_KEY_ACCESS;
} else if (!strcasecmp(flags, "*")) {
permissions = REDISMODULE_CMD_KEY_UPDATE | REDISMODULE_CMD_KEY_ACCESS;
} else if (!strcasecmp(flags, "~")) {
permissions = 0; /* Requires either read or write */
} else {
RedisModule_ReplyWithError(ctx, "INVALID FLAGS");
return REDISMODULE_OK;
}
/* Check that the key can be accessed */
RedisModuleString *user_name = RedisModule_GetCurrentUserName(ctx);
RedisModuleUser *user = RedisModule_GetModuleUserFromUserName(user_name);
int ret = RedisModule_ACLCheckKeyPrefixPermissions(user, argv[2], permissions);
if (ret != 0) {
RedisModule_ReplyWithError(ctx, "DENIED KEY");
RedisModule_FreeModuleUser(user);
RedisModule_FreeString(ctx, user_name);
return REDISMODULE_OK;
}
RedisModuleCallReply *rep = RedisModule_Call(ctx, "SET", "v", argv + 3, argc - 3);
if (!rep) {
RedisModule_ReplyWithError(ctx, "NULL reply returned");
} else {
RedisModule_ReplyWithCallReply(ctx, rep);
RedisModule_FreeCallReply(rep);
}
RedisModule_FreeModuleUser(user);
RedisModule_FreeString(ctx, user_name);
return REDISMODULE_OK;
}
/* A wrap for PUBLISH command with ACL check on the channel. */ /* A wrap for PUBLISH command with ACL check on the channel. */
int publish_aclcheck_channel(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { int publish_aclcheck_channel(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc != 3) { if (argc != 3) {
@ -247,6 +293,9 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
if (RedisModule_CreateCommand(ctx,"aclcheck.set.check.key", set_aclcheck_key,"write",0,0,0) == REDISMODULE_ERR) if (RedisModule_CreateCommand(ctx,"aclcheck.set.check.key", set_aclcheck_key,"write",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR; return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx,"aclcheck.set.check.prefixkey", set_aclcheck_prefixkey,"write",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx,"block.commands.outside.onload", commandBlockCheck,"write",0,0,0) == REDISMODULE_ERR) if (RedisModule_CreateCommand(ctx,"block.commands.outside.onload", commandBlockCheck,"write",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR; return REDISMODULE_ERR;

View File

@ -21,19 +21,291 @@ void segfaultCrash(RedisModuleInfoCtx *ctx, int for_crash_report) {
*p = 'x'; *p = 'x';
} }
int cmd_crash(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
UNUSED(ctx);
UNUSED(argv);
UNUSED(argc);
RedisModule_Assert(0);
return REDISMODULE_OK;
}
int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
REDISMODULE_NOT_USED(argv); REDISMODULE_NOT_USED(argv);
REDISMODULE_NOT_USED(argc); REDISMODULE_NOT_USED(argc);
if (RedisModule_Init(ctx,"infocrash",1,REDISMODULE_APIVER_1) if (RedisModule_Init(ctx,"modulecrash",1,REDISMODULE_APIVER_1)
== REDISMODULE_ERR) return REDISMODULE_ERR; == REDISMODULE_ERR) return REDISMODULE_ERR;
RedisModule_Assert(argc == 1);
if (!strcasecmp(RedisModule_StringPtrLen(argv[0], NULL), "segfault")) { if (argc >= 1) {
if (RedisModule_RegisterInfoFunc(ctx, segfaultCrash) == REDISMODULE_ERR) return REDISMODULE_ERR; if (!strcasecmp(RedisModule_StringPtrLen(argv[0], NULL), "segfault")) {
} else if(!strcasecmp(RedisModule_StringPtrLen(argv[0], NULL), "assert")) { if (RedisModule_RegisterInfoFunc(ctx, segfaultCrash) == REDISMODULE_ERR) return REDISMODULE_ERR;
if (RedisModule_RegisterInfoFunc(ctx, assertCrash) == REDISMODULE_ERR) return REDISMODULE_ERR; } else if (!strcasecmp(RedisModule_StringPtrLen(argv[0], NULL),"assert")) {
} else { if (RedisModule_RegisterInfoFunc(ctx, assertCrash) == REDISMODULE_ERR) return REDISMODULE_ERR;
return REDISMODULE_ERR; }
} }
/* Create modulecrash.xadd command which is similar to xadd command.
* It will crash in the command handler to verify we print command tokens
* when hide-user-data-from-log config is enabled */
RedisModuleCommandInfo info = {
.version = REDISMODULE_COMMAND_INFO_VERSION,
.arity = -5,
.key_specs = (RedisModuleCommandKeySpec[]){
{
.notes = "UPDATE instead of INSERT because of the optional trimming feature",
.flags = REDISMODULE_CMD_KEY_RW | REDISMODULE_CMD_KEY_UPDATE,
.begin_search_type = REDISMODULE_KSPEC_BS_INDEX,
.bs.index.pos = 1,
.find_keys_type = REDISMODULE_KSPEC_FK_RANGE,
.fk.range = {0,1,0}
},
{0}
},
.args = (RedisModuleCommandArg[]){
{
.name = "key",
.type = REDISMODULE_ARG_TYPE_KEY,
.key_spec_index = 0
},
{
.name = "nomkstream",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "NOMKSTREAM",
.since = "6.2.0",
.flags = REDISMODULE_CMD_ARG_OPTIONAL
},
{
.name = "trim",
.type = REDISMODULE_ARG_TYPE_BLOCK,
.flags = REDISMODULE_CMD_ARG_OPTIONAL,
.subargs = (RedisModuleCommandArg[]){
{
.name = "strategy",
.type = REDISMODULE_ARG_TYPE_ONEOF,
.subargs = (RedisModuleCommandArg[]){
{
.name = "maxlen",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "MAXLEN",
},
{
.name = "minid",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "MINID",
.since = "6.2.0",
},
{0}
}
},
{
.name = "operator",
.type = REDISMODULE_ARG_TYPE_ONEOF,
.flags = REDISMODULE_CMD_ARG_OPTIONAL,
.subargs = (RedisModuleCommandArg[]){
{
.name = "equal",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "="
},
{
.name = "approximately",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "~"
},
{0}
}
},
{
.name = "threshold",
.type = REDISMODULE_ARG_TYPE_STRING,
.display_text = "threshold" /* Just for coverage, doesn't have a visible effect */
},
{
.name = "count",
.type = REDISMODULE_ARG_TYPE_INTEGER,
.token = "LIMIT",
.since = "6.2.0",
.flags = REDISMODULE_CMD_ARG_OPTIONAL
},
{0}
}
},
{
.name = "id-selector",
.type = REDISMODULE_ARG_TYPE_ONEOF,
.subargs = (RedisModuleCommandArg[]){
{
.name = "auto-id",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "*"
},
{
.name = "id",
.type = REDISMODULE_ARG_TYPE_STRING,
},
{0}
}
},
{
.name = "data",
.type = REDISMODULE_ARG_TYPE_BLOCK,
.flags = REDISMODULE_CMD_ARG_MULTIPLE,
.subargs = (RedisModuleCommandArg[]){
{
.name = "field",
.type = REDISMODULE_ARG_TYPE_STRING,
},
{
.name = "value",
.type = REDISMODULE_ARG_TYPE_STRING,
},
{0}
}
},
{0}
}
};
RedisModuleCommand *cmd;
if (RedisModule_CreateCommand(ctx,"modulecrash.xadd", cmd_crash,"write deny-oom random fast",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
cmd = RedisModule_GetCommand(ctx,"modulecrash.xadd");
if (RedisModule_SetCommandInfo(cmd, &info) == REDISMODULE_ERR)
return REDISMODULE_ERR;
/* Create a subcommand: modulecrash.parent sub
* It will crash in the command handler to verify we print subcommand name
* when hide-user-data-from-log config is enabled */
RedisModuleCommandInfo subcommand_info = {
.version = REDISMODULE_COMMAND_INFO_VERSION,
.arity = -5,
.key_specs = (RedisModuleCommandKeySpec[]){
{
.flags = REDISMODULE_CMD_KEY_RW | REDISMODULE_CMD_KEY_UPDATE,
.begin_search_type = REDISMODULE_KSPEC_BS_INDEX,
.bs.index.pos = 1,
.find_keys_type = REDISMODULE_KSPEC_FK_RANGE,
.fk.range = {0,1,0}
},
{0}
},
.args = (RedisModuleCommandArg[]){
{
.name = "key",
.type = REDISMODULE_ARG_TYPE_KEY,
.key_spec_index = 0
},
{
.name = "token",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "TOKEN",
.flags = REDISMODULE_CMD_ARG_OPTIONAL
},
{
.name = "data",
.type = REDISMODULE_ARG_TYPE_BLOCK,
.subargs = (RedisModuleCommandArg[]){
{
.name = "field",
.type = REDISMODULE_ARG_TYPE_STRING,
},
{
.name = "value",
.type = REDISMODULE_ARG_TYPE_STRING,
},
{0}
}
},
{0}
}
};
if (RedisModule_CreateCommand(ctx,"modulecrash.parent",NULL,"",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
RedisModuleCommand *parent = RedisModule_GetCommand(ctx,"modulecrash.parent");
if (RedisModule_CreateSubcommand(parent,"subcmd",cmd_crash,"",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
cmd = RedisModule_GetCommand(ctx,"modulecrash.parent|subcmd");
if (RedisModule_SetCommandInfo(cmd, &subcommand_info) == REDISMODULE_ERR)
return REDISMODULE_ERR;
/* Create modulecrash.zunion command which is similar to zunion command.
* It will crash in the command handler to verify we print command tokens
* when hide-user-data-from-log config is enabled */
RedisModuleCommandInfo zunioninfo = {
.version = REDISMODULE_COMMAND_INFO_VERSION,
.arity = -5,
.key_specs = (RedisModuleCommandKeySpec[]){
{
.flags = REDISMODULE_CMD_KEY_RO,
.begin_search_type = REDISMODULE_KSPEC_BS_INDEX,
.bs.index.pos = 1,
.find_keys_type = REDISMODULE_KSPEC_FK_KEYNUM,
.fk.keynum = {0,1,1}
},
{0}
},
.args = (RedisModuleCommandArg[]){
{
.name = "numkeys",
.type = REDISMODULE_ARG_TYPE_INTEGER,
},
{
.name = "key",
.type = REDISMODULE_ARG_TYPE_KEY,
.key_spec_index = 0,
.flags = REDISMODULE_CMD_ARG_MULTIPLE
},
{
.name = "weights",
.type = REDISMODULE_ARG_TYPE_INTEGER,
.token = "WEIGHTS",
.flags = REDISMODULE_CMD_ARG_OPTIONAL | REDISMODULE_CMD_ARG_MULTIPLE
},
{
.name = "aggregate",
.type = REDISMODULE_ARG_TYPE_ONEOF,
.token = "AGGREGATE",
.flags = REDISMODULE_CMD_ARG_OPTIONAL,
.subargs = (RedisModuleCommandArg[]){
{
.name = "sum",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "sum"
},
{
.name = "min",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "min"
},
{
.name = "max",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "max"
},
{0}
}
},
{
.name = "withscores",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "WITHSCORES",
.flags = REDISMODULE_CMD_ARG_OPTIONAL
},
{0}
}
};
if (RedisModule_CreateCommand(ctx,"modulecrash.zunion", cmd_crash,"readonly",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
cmd = RedisModule_GetCommand(ctx,"modulecrash.zunion");
if (RedisModule_SetCommandInfo(cmd, &zunioninfo) == REDISMODULE_ERR)
return REDISMODULE_ERR;
return REDISMODULE_OK; return REDISMODULE_OK;
} }

View File

@ -312,3 +312,12 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
return REDISMODULE_OK; return REDISMODULE_OK;
} }
int RedisModule_OnUnload(RedisModuleCtx *ctx) {
REDISMODULE_NOT_USED(ctx);
if (datatype) {
RedisModule_Free(datatype);
datatype = NULL;
}
return REDISMODULE_OK;
}

View File

@ -161,13 +161,14 @@ size_t FragFreeEffort(RedisModuleString *key, const void *value) {
} }
int FragDefrag(RedisModuleDefragCtx *ctx, RedisModuleString *key, void **value) { int FragDefrag(RedisModuleDefragCtx *ctx, RedisModuleString *key, void **value) {
REDISMODULE_NOT_USED(key);
unsigned long i = 0; unsigned long i = 0;
int steps = 0; int steps = 0;
int dbid = RedisModule_GetDbIdFromDefragCtx(ctx); int dbid = RedisModule_GetDbIdFromDefragCtx(ctx);
RedisModule_Assert(dbid != -1); RedisModule_Assert(dbid != -1);
RedisModule_Log(NULL, "notice", "Defrag key: %s", RedisModule_StringPtrLen(key, NULL));
/* Attempt to get cursor, validate it's what we're exepcting */ /* Attempt to get cursor, validate it's what we're exepcting */
if (RedisModule_DefragCursorGet(ctx, &i) == REDISMODULE_OK) { if (RedisModule_DefragCursorGet(ctx, &i) == REDISMODULE_OK) {
if (i > 0) datatype_resumes++; if (i > 0) datatype_resumes++;

View File

@ -117,6 +117,67 @@ int test_open_key_subexpired_hget(RedisModuleCtx *ctx, RedisModuleString **argv,
return REDISMODULE_OK; return REDISMODULE_OK;
} }
int test_open_key_hget_expire(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc<3) {
RedisModule_WrongArity(ctx);
return REDISMODULE_OK;
}
RedisModuleKey *key = openKeyWithMode(ctx, argv[1], REDISMODULE_OPEN_KEY_ACCESS_EXPIRED);
if (!key) return REDISMODULE_OK;
mstime_t expireAt;
/* Let's test here that we get error if using invalid flags combination */
RedisModule_Assert(
RedisModule_HashGet(key,
REDISMODULE_HASH_EXISTS |
REDISMODULE_HASH_EXPIRE_TIME,
argv[2], &expireAt, NULL) == REDISMODULE_ERR);
/* Now let's get the expire time */
RedisModule_HashGet(key, REDISMODULE_HASH_EXPIRE_TIME,argv[2],&expireAt,NULL);
RedisModule_ReplyWithLongLong(ctx, expireAt);
RedisModule_CloseKey(key);
return REDISMODULE_OK;
}
/* Test variadic function to get two expiration times */
int test_open_key_hget_two_expire(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc<3) {
RedisModule_WrongArity(ctx);
return REDISMODULE_OK;
}
RedisModuleKey *key = openKeyWithMode(ctx, argv[1], REDISMODULE_OPEN_KEY_ACCESS_EXPIRED);
if (!key) return REDISMODULE_OK;
mstime_t expireAt1, expireAt2;
RedisModule_HashGet(key,REDISMODULE_HASH_EXPIRE_TIME,argv[2],&expireAt1,argv[3],&expireAt2,NULL);
/* return the two expire time */
RedisModule_ReplyWithArray(ctx, 2);
RedisModule_ReplyWithLongLong(ctx, expireAt1);
RedisModule_ReplyWithLongLong(ctx, expireAt2);
RedisModule_CloseKey(key);
return REDISMODULE_OK;
}
int test_open_key_hget_min_expire(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc!=2) {
RedisModule_WrongArity(ctx);
return REDISMODULE_OK;
}
RedisModuleKey *key = openKeyWithMode(ctx, argv[1], REDISMODULE_READ);
if (!key) return REDISMODULE_OK;
volatile mstime_t minExpire = RedisModule_HashFieldMinExpire(key);
RedisModule_ReplyWithLongLong(ctx, minExpire);
RedisModule_CloseKey(key);
return REDISMODULE_OK;
}
int numReplies; int numReplies;
void ScanCallback(RedisModuleKey *key, RedisModuleString *field, RedisModuleString *value, void *privdata) { void ScanCallback(RedisModuleKey *key, RedisModuleString *field, RedisModuleString *value, void *privdata) {
UNUSED(key); UNUSED(key);
@ -172,6 +233,12 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
return REDISMODULE_ERR; return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx, "hash.hscan_expired", test_open_key_access_expired_hscan,"", 0, 0, 0) == REDISMODULE_ERR) if (RedisModule_CreateCommand(ctx, "hash.hscan_expired", test_open_key_access_expired_hscan,"", 0, 0, 0) == REDISMODULE_ERR)
return REDISMODULE_ERR; return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx, "hash.hget_expire", test_open_key_hget_expire,"", 0, 0, 0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx, "hash.hget_two_expire", test_open_key_hget_two_expire,"", 0, 0, 0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx, "hash.hget_min_expire", test_open_key_hget_min_expire,"", 0, 0, 0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
return REDISMODULE_OK; return REDISMODULE_OK;
} }

View File

@ -1,11 +1,12 @@
#include "redismodule.h" #include "redismodule.h"
#include <strings.h> #include <strings.h>
int mutable_bool_val; int mutable_bool_val, no_prefix_bool, no_prefix_bool2;
int immutable_bool_val; int immutable_bool_val;
long long longval; long long longval, no_prefix_longval;
long long memval; long long memval, no_prefix_memval;
RedisModuleString *strval = NULL; RedisModuleString *strval = NULL;
int enumval; RedisModuleString *strval2 = NULL;
int enumval, no_prefix_enumval;
int flagsval; int flagsval;
/* Series of get and set callbacks for each type of config, these rely on the privdata ptr /* Series of get and set callbacks for each type of config, these rely on the privdata ptr
@ -103,6 +104,36 @@ int longlongApplyFunc(RedisModuleCtx *ctx, void *privdata, RedisModuleString **e
return REDISMODULE_OK; return REDISMODULE_OK;
} }
RedisModuleString *getStringConfigUnprefix(const char *name, void *privdata) {
REDISMODULE_NOT_USED(name);
REDISMODULE_NOT_USED(privdata);
return strval2;
}
int setStringConfigUnprefix(const char *name, RedisModuleString *new, void *privdata, RedisModuleString **err) {
REDISMODULE_NOT_USED(name);
REDISMODULE_NOT_USED(err);
REDISMODULE_NOT_USED(privdata);
if (strval2) RedisModule_FreeString(NULL, strval2);
RedisModule_RetainString(NULL, new);
strval2 = new;
return REDISMODULE_OK;
}
int getEnumConfigUnprefix(const char *name, void *privdata) {
REDISMODULE_NOT_USED(name);
REDISMODULE_NOT_USED(privdata);
return no_prefix_enumval;
}
int setEnumConfigUnprefix(const char *name, int val, void *privdata, RedisModuleString **err) {
REDISMODULE_NOT_USED(name);
REDISMODULE_NOT_USED(err);
REDISMODULE_NOT_USED(privdata);
no_prefix_enumval = val;
return REDISMODULE_OK;
}
int registerBlockCheck(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { int registerBlockCheck(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
REDISMODULE_NOT_USED(argv); REDISMODULE_NOT_USED(argv);
REDISMODULE_NOT_USED(argc); REDISMODULE_NOT_USED(argc);
@ -168,6 +199,30 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
if (RedisModule_RegisterNumericConfig(ctx, "numeric", -1, REDISMODULE_CONFIG_DEFAULT, -5, 2000, getNumericConfigCommand, setNumericConfigCommand, longlongApplyFunc, &longval) == REDISMODULE_ERR) { if (RedisModule_RegisterNumericConfig(ctx, "numeric", -1, REDISMODULE_CONFIG_DEFAULT, -5, 2000, getNumericConfigCommand, setNumericConfigCommand, longlongApplyFunc, &longval) == REDISMODULE_ERR) {
return REDISMODULE_ERR; return REDISMODULE_ERR;
} }
/*** unprefixed and aliased configuration ***/
if (RedisModule_RegisterBoolConfig(ctx, "unprefix-bool|unprefix-bool-alias", 1, REDISMODULE_CONFIG_DEFAULT|REDISMODULE_CONFIG_UNPREFIXED,
getBoolConfigCommand, setBoolConfigCommand, NULL, &no_prefix_bool) == REDISMODULE_ERR) {
return REDISMODULE_ERR;
}
if (RedisModule_RegisterBoolConfig(ctx, "unprefix-noalias-bool", 1, REDISMODULE_CONFIG_DEFAULT|REDISMODULE_CONFIG_UNPREFIXED,
getBoolConfigCommand, setBoolConfigCommand, NULL, &no_prefix_bool2) == REDISMODULE_ERR) {
return REDISMODULE_ERR;
}
if (RedisModule_RegisterNumericConfig(ctx, "unprefix.numeric|unprefix.numeric-alias", -1, REDISMODULE_CONFIG_DEFAULT|REDISMODULE_CONFIG_UNPREFIXED,
-5, 2000, getNumericConfigCommand, setNumericConfigCommand, NULL, &no_prefix_longval) == REDISMODULE_ERR) {
return REDISMODULE_ERR;
}
if (RedisModule_RegisterStringConfig(ctx, "unprefix-string|unprefix.string-alias", "secret unprefix", REDISMODULE_CONFIG_DEFAULT|REDISMODULE_CONFIG_UNPREFIXED,
getStringConfigUnprefix, setStringConfigUnprefix, NULL, NULL) == REDISMODULE_ERR) {
return REDISMODULE_ERR;
}
if (RedisModule_RegisterEnumConfig(ctx, "unprefix-enum|unprefix-enum-alias", 1, REDISMODULE_CONFIG_DEFAULT|REDISMODULE_CONFIG_UNPREFIXED,
enum_vals, int_vals, 5, getEnumConfigUnprefix, setEnumConfigUnprefix, NULL, NULL) == REDISMODULE_ERR) {
return REDISMODULE_ERR;
}
size_t len; size_t len;
if (argc && !strcasecmp(RedisModule_StringPtrLen(argv[0], &len), "noload")) { if (argc && !strcasecmp(RedisModule_StringPtrLen(argv[0], &len), "noload")) {
return REDISMODULE_OK; return REDISMODULE_OK;
@ -191,5 +246,9 @@ int RedisModule_OnUnload(RedisModuleCtx *ctx) {
RedisModule_FreeString(ctx, strval); RedisModule_FreeString(ctx, strval);
strval = NULL; strval = NULL;
} }
if (strval2) {
RedisModule_FreeString(ctx, strval2);
strval2 = NULL;
}
return REDISMODULE_OK; return REDISMODULE_OK;
} }

View File

@ -373,6 +373,8 @@ proc run_external_server_test {code overrides} {
r flushall r flushall
r function flush r function flush
r script flush
r config resetstat
# store configs # store configs
set saved_config {} set saved_config {}

View File

@ -1,3 +1,16 @@
#
# Copyright (c) 2009-Present, Redis Ltd.
# All rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of the Redis Source Available License 2.0
# (RSALv2) or the Server Side Public License v1 (SSPLv1).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
proc randstring {min max {type binary}} { proc randstring {min max {type binary}} {
set len [expr {$min+int(rand()*($max-$min+1))}] set len [expr {$min+int(rand()*($max-$min+1))}]
set output {} set output {}
@ -118,11 +131,11 @@ proc wait_for_sync r {
} }
} }
proc wait_replica_online r { proc wait_replica_online {r {replica_id 0} {maxtries 50} {delay 100}} {
wait_for_condition 50 100 { wait_for_condition $maxtries $delay {
[string match "*slave0:*,state=online*" [$r info replication]] [string match "*slave$replica_id:*,state=online*" [$r info replication]]
} else { } else {
fail "replica didn't online in time" fail "replica $replica_id did not become online in time"
} }
} }
@ -565,10 +578,11 @@ proc find_valgrind_errors {stderr on_termination} {
} }
# Execute a background process writing random data for the specified number # Execute a background process writing random data for the specified number
# of seconds to the specified Redis instance. # of seconds to the specified Redis instance. If key is omitted, a random key
proc start_write_load {host port seconds} { # is used for every SET command.
proc start_write_load {host port seconds {key ""}} {
set tclsh [info nameofexecutable] set tclsh [info nameofexecutable]
exec $tclsh tests/helpers/gen_write_load.tcl $host $port $seconds $::tls & exec $tclsh tests/helpers/gen_write_load.tcl $host $port $seconds $::tls $key &
} }
# Stop a process generating write load executed with start_write_load. # Stop a process generating write load executed with start_write_load.
@ -677,6 +691,12 @@ proc pause_process pid {
} }
proc resume_process pid { proc resume_process pid {
wait_for_condition 50 1000 {
[string match "T*" [exec ps -o state= -p $pid]]
} else {
puts [exec ps j $pid]
fail "process was not stopped"
}
exec kill -SIGCONT $pid exec kill -SIGCONT $pid
} }
@ -698,6 +718,16 @@ proc latencyrstat_percentiles {cmd r} {
} }
} }
proc get_io_thread_clients {id {client r}} {
set pattern "io_thread_$id:clients=(\[0-9\]+)"
set info [$client info threads]
if {[regexp $pattern $info _ value]} {
return $value
} else {
return -1
}
}
proc generate_fuzzy_traffic_on_key {key type duration} { proc generate_fuzzy_traffic_on_key {key type duration} {
# Commands per type, blocking commands removed # Commands per type, blocking commands removed
# TODO: extract these from COMMAND DOCS, and improve to include other types # TODO: extract these from COMMAND DOCS, and improve to include other types

View File

@ -116,6 +116,32 @@ start_server {tags {"acl external:skip"}} {
assert_match "*NOPERM*key*" $err assert_match "*NOPERM*key*" $err
} }
test {Validate read and write permissions format - empty permission} {
catch {r ACL SETUSER key-permission-RW %~} err
set err
} {ERR Error in ACL SETUSER modifier '%~': Syntax error}
test {Validate read and write permissions format - empty selector} {
catch {r ACL SETUSER key-permission-RW %} err
set err
} {ERR Error in ACL SETUSER modifier '%': Syntax error}
test {Validate read and write permissions format - empty pattern} {
# Empty pattern results with R/W access to no key
r ACL SETUSER key-permission-RW on nopass %RW~ +@all
$r2 auth key-permission-RW password
catch {$r2 SET x 5} err
set err
} {NOPERM No permissions to access a key}
test {Validate read and write permissions format - no pattern} {
# No pattern results with R/W access to no key (currently we accept this syntax error)
r ACL SETUSER key-permission-RW on nopass %RW +@all
$r2 auth key-permission-RW password
catch {$r2 SET x 5} err
set err
} {NOPERM No permissions to access a key}
test {Test separate read and write permissions on different selectors are not additive} { test {Test separate read and write permissions on different selectors are not additive} {
r ACL SETUSER key-permission-RW-selector on nopass "(%R~read* +@all)" "(%W~write* +@all)" r ACL SETUSER key-permission-RW-selector on nopass "(%R~read* +@all)" "(%W~write* +@all)"
$r2 auth key-permission-RW-selector password $r2 auth key-permission-RW-selector password

View File

@ -1,3 +1,16 @@
#
# Copyright (c) 2009-Present, Redis Ltd.
# All rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of the Redis Source Available License 2.0
# (RSALv2) or the Server Side Public License v1 (SSPLv1).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
start_server {tags {"auth external:skip"}} { start_server {tags {"auth external:skip"}} {
test {AUTH fails if there is no password configured server side} { test {AUTH fails if there is no password configured server side} {
catch {r auth foo} err catch {r auth foo} err
@ -65,24 +78,29 @@ start_server {tags {"auth_binary_password external:skip"}} {
set master_port [srv -1 port] set master_port [srv -1 port]
set slave [srv 0 client] set slave [srv 0 client]
test {MASTERAUTH test with binary password} { foreach rdbchannel {yes no} {
$master config set requirepass "abc\x00def" test "MASTERAUTH test with binary password rdbchannel=$rdbchannel" {
$slave slaveof no one
$master config set requirepass "abc\x00def"
$master config set repl-rdb-channel $rdbchannel
# Configure the replica with masterauth # Configure the replica with masterauth
set loglines [count_log_lines 0] set loglines [count_log_lines 0]
$slave config set masterauth "abc" $slave config set masterauth "abc"
$slave slaveof $master_host $master_port $slave config set repl-rdb-channel $rdbchannel
$slave slaveof $master_host $master_port
# Verify replica is not able to sync with master # Verify replica is not able to sync with master
wait_for_log_messages 0 {"*Unable to AUTH to MASTER*"} $loglines 1000 10 wait_for_log_messages 0 {"*Unable to AUTH to MASTER*"} $loglines 1000 10
assert_equal {down} [s 0 master_link_status] assert_equal {down} [s 0 master_link_status]
# Test replica with the correct masterauth # Test replica with the correct masterauth
$slave config set masterauth "abc\x00def" $slave config set masterauth "abc\x00def"
wait_for_condition 50 100 { wait_for_condition 50 100 {
[s 0 master_link_status] eq {up} [s 0 master_link_status] eq {up}
} else { } else {
fail "Can't turn the instance into a replica" fail "Can't turn the instance into a replica"
}
} }
} }
} }

View File

@ -108,7 +108,11 @@ start_server {} {
$rr write [join [list "*1\r\n\$$maxmemory_clients_actual\r\n" [string repeat v $maxmemory_clients_actual]] ""] $rr write [join [list "*1\r\n\$$maxmemory_clients_actual\r\n" [string repeat v $maxmemory_clients_actual]] ""]
$rr flush $rr flush
} e } e
assert {![client_exists $cname]} wait_for_condition 100 10 {
![client_exists $cname]
} else {
fail "Failed to evict client"
}
$rr close $rr close
# Restore settings # Restore settings
@ -360,6 +364,13 @@ start_server {} {
resume_process $server_pid resume_process $server_pid
r ping ;# make sure a full event loop cycle is processed before issuing CLIENT LIST r ping ;# make sure a full event loop cycle is processed before issuing CLIENT LIST
# wait for get commands to be processed
wait_for_condition 100 10 {
[expr {[regexp {calls=(\d+)} [cmdrstat get r] -> calls] ? $calls : 0}] >= 2
} else {
fail "get did not arrive"
}
# Validate obuf-clients were disconnected (because of obuf limit) # Validate obuf-clients were disconnected (because of obuf limit)
catch {client_field obuf-client1 name} e catch {client_field obuf-client1 name} e
assert_match {no client named obuf-client1 found*} $e assert_match {no client named obuf-client1 found*} $e
@ -367,7 +378,9 @@ start_server {} {
assert_match {no client named obuf-client2 found*} $e assert_match {no client named obuf-client2 found*} $e
# Validate qbuf-client is still connected and wasn't evicted # Validate qbuf-client is still connected and wasn't evicted
assert_equal [client_field qbuf-client name] {qbuf-client} if {[lindex [r config get io-threads] 1] == 1} {
assert_equal [client_field qbuf-client name] {qbuf-client}
}
$rr1 close $rr1 close
$rr2 close $rr2 close
@ -404,8 +417,11 @@ start_server {} {
# Decrease maxmemory_clients and expect client eviction # Decrease maxmemory_clients and expect client eviction
r config set maxmemory-clients [expr $maxmemory_clients / 2] r config set maxmemory-clients [expr $maxmemory_clients / 2]
set connected_clients [llength [lsearch -all [split [string trim [r client list]] "\r\n"] *name=client*]] wait_for_condition 200 10 {
assert {$connected_clients > 0 && $connected_clients < $client_count} [llength [regexp -all -inline {name=client} [r client list]]] < $client_count
} else {
fail "Failed to evict clients"
}
foreach rr $rrs {$rr close} foreach rr $rrs {$rr close}
} }
@ -463,8 +479,11 @@ start_server {} {
assert {$total_client_mem <= $maxmemory_clients} assert {$total_client_mem <= $maxmemory_clients}
# Make sure we have only half of our clients now # Make sure we have only half of our clients now
set connected_clients [llength [lsearch -all [split [string trim [r client list]] "\r\n"] *name=client*]] wait_for_condition 200 100 {
assert {$connected_clients == [expr $client_count / 2]} [llength [regexp -all -inline {name=client} [r client list]]] == $client_count / 2
} else {
fail "Failed to evict clients"
}
# Restore the reply buffer resize to default # Restore the reply buffer resize to default
r debug replybuffer resizing 1 r debug replybuffer resizing 1
@ -519,7 +538,8 @@ start_server {} {
foreach size [lreverse $sizes] { foreach size [lreverse $sizes] {
set control_mem [client_field control tot-mem] set control_mem [client_field control tot-mem]
set total_mem [expr $total_mem - $clients_per_size * $size] set total_mem [expr $total_mem - $clients_per_size * $size]
r config set maxmemory-clients [expr $total_mem + $control_mem] # allow some tolerance when using io threads
r config set maxmemory-clients [expr $total_mem + $control_mem + 1000]
set clients [split [string trim [r client list]] "\r\n"] set clients [split [string trim [r client list]] "\r\n"]
# Verify only relevant clients were evicted # Verify only relevant clients were evicted
for {set i 0} {$i < [llength $sizes]} {incr i} { for {set i 0} {$i < [llength $sizes]} {incr i} {

View File

@ -222,6 +222,46 @@ start_server {tags {"hll"}} {
assert_equal 3 [r pfcount destkey] assert_equal 3 [r pfcount destkey]
} }
test {PFMERGE results with simd} {
r del hllscalar{t} hllsimd{t} hll1{t} hll2{t} hll3{t}
for {set x 1} {$x < 2000} {incr x} {
r pfadd hll1{t} [expr rand()]
}
for {set x 1} {$x < 4000} {incr x} {
r pfadd hll2{t} [expr rand()]
}
for {set x 1} {$x < 8000} {incr x} {
r pfadd hll3{t} [expr rand()]
}
assert {[r pfcount hll1{t}] > 0}
assert {[r pfcount hll2{t}] > 0}
assert {[r pfcount hll3{t}] > 0}
r pfdebug simd off
set scalar [r pfcount hll1{t} hll2{t} hll3{t}]
r pfdebug simd on
set simd [r pfcount hll1{t} hll2{t} hll3{t}]
assert {$scalar > 0}
assert {$simd > 0}
assert_equal $scalar $simd
r pfdebug simd off
r pfmerge hllscalar{t} hll1{t} hll2{t} hll3{t}
r pfdebug simd on
r pfmerge hllsimd{t} hll1{t} hll2{t} hll3{t}
set scalar [r pfcount hllscalar{t}]
set simd [r pfcount hllsimd{t}]
assert {$scalar > 0}
assert {$simd > 0}
assert_equal $scalar $simd
set scalar [r get hllscalar{t}]
set simd [r get hllsimd{t}]
assert_equal $scalar $simd
} {} {needs:pfdebug}
test {PFCOUNT multiple-keys merge returns cardinality of union #1} { test {PFCOUNT multiple-keys merge returns cardinality of union #1} {
r del hll1{t} hll2{t} hll3{t} r del hll1{t} hll2{t} hll3{t}
for {set x 1} {$x < 10000} {incr x} { for {set x 1} {$x < 10000} {incr x} {

View File

@ -0,0 +1,454 @@
################################################################################
# Test the "info keysizes" command.
# The command returns a histogram of the sizes of keys in the database.
################################################################################
# Query and Strip result of "info keysizes" from header, spaces, and newlines.
proc get_stripped_info {server} {
set infoStripped [string map {
"# Keysizes" ""
" " "" "\n" "" "\r" ""
} [$server info keysizes] ]
return $infoStripped
}
# Verify output of "info keysizes" command is as expected.
#
# Arguments:
# cmd - A command that should be run before the verification.
# expOutput - This is a string that represents the expected output abbreviated.
# Instead of the output of "strings_len_exp_distrib" write "STR".
# Similarly for LIST, SET, ZSET and HASH. Spaces and newlines are
# ignored.
# waitCond - If set to 1, the function wait_for_condition 50x50msec for the
# expOutput to match the actual output.
#
# (replicaMode) - Global variable that indicates if the test is running in replica
# mode. If so, run the command on leader, verify the output. Then wait
# for the replica to catch up and verify the output on the replica
# as well. Otherwise, just run the command on the leader and verify
# the output.
proc run_cmd_verify_hist {cmd expOutput {waitCond 0} } {
uplevel 1 $cmd
global replicaMode
# ref the leader with `server` variable
if {$replicaMode eq 1} { set server [srv -1 client] } else { set server [srv 0 client] }
# Replace all placeholders with the actual values. Remove spaces & newlines.
set expStripped [string map {
"STR" "distrib_strings_sizes"
"LIST" "distrib_lists_items"
"SET" "distrib_sets_items"
"ZSET" "distrib_zsets_items"
"HASH" "distrib_hashes_items"
" " "" "\n" "" "\r" ""
} $expOutput]
if {$waitCond} {
wait_for_condition 50 50 {
$expStripped eq [get_stripped_info $server]
} else {
fail "Unexpected KEYSIZES. Expected: `$expStripped` \
but got: `[get_stripped_info $server]`. Failed after command: $cmd"
}
} else {
set infoStripped [get_stripped_info $server]
if {$expStripped ne $infoStripped} {
fail "Unexpected KEYSIZES. Expected: `$expStripped` \
but got: `$infoStripped`. Failed after command: $cmd"
}
}
# If we are testing `replicaMode` then need to wait for the replica to catch up
if {$replicaMode eq 1} {
wait_for_condition 50 50 {
$expStripped eq [get_stripped_info $server]
} else {
fail "Unexpected replica KEYSIZES. Expected: `$expStripped` \
but got: `[get_stripped_info $server]`. Failed after command: $cmd"
}
}
}
proc test_all_keysizes { {replMode 0} } {
# If in replica mode then update global var `replicaMode` so function
# `run_cmd_verify_hist` knows to run the command on the leader and then
# wait for the replica to catch up.
global replicaMode
set replicaMode $replMode
# ref the leader with `server` variable
if {$replicaMode eq 1} {
set server [srv -1 client]
set suffixRepl "(replica)"
} else {
set server [srv 0 client]
set suffixRepl ""
}
test "KEYSIZES - Test i'th bin counts keysizes between (2^i) and (2^(i+1)-1) as expected $suffixRepl" {
set base_string ""
run_cmd_verify_hist {$server FLUSHALL} {}
for {set i 1} {$i <= 10} {incr i} {
append base_string "x"
set log_value [expr {1 << int(log($i) / log(2))}]
#puts "Iteration $i: $base_string (Log base 2 pattern: $log_value)"
run_cmd_verify_hist {$server set mykey $base_string} "db0_STR:$log_value=1"
}
}
test "KEYSIZES - Histogram of values of Bytes, Kilo and Mega $suffixRepl" {
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server set x 0123456789ABCDEF} {db0_STR:16=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:32=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:64=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:128=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:256=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:512=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:1K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:2K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:4K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:8K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:16K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:32K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:64K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:128K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:256K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:512K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:1M=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:2M=1}
}
test "KEYSIZES - Test List $suffixRepl" {
# FLUSHALL
run_cmd_verify_hist {$server FLUSHALL} {}
# RPUSH
run_cmd_verify_hist {$server RPUSH l1 1 2 3 4 5} {db0_LIST:4=1}
run_cmd_verify_hist {$server RPUSH l1 6 7 8 9} {db0_LIST:8=1}
# Test also LPUSH, RPUSH, LPUSHX, RPUSHX
run_cmd_verify_hist {$server LPUSH l2 1} {db0_LIST:1=1,8=1}
run_cmd_verify_hist {$server LPUSH l2 2} {db0_LIST:2=1,8=1}
run_cmd_verify_hist {$server LPUSHX l2 3} {db0_LIST:2=1,8=1}
run_cmd_verify_hist {$server RPUSHX l2 4} {db0_LIST:4=1,8=1}
# RPOP
run_cmd_verify_hist {$server RPOP l1} {db0_LIST:4=1,8=1}
run_cmd_verify_hist {$server RPOP l1} {db0_LIST:4=2}
# DEL
run_cmd_verify_hist {$server DEL l1} {db0_LIST:4=1}
# LINSERT, LTRIM
run_cmd_verify_hist {$server RPUSH l3 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14} {db0_LIST:4=1,8=1}
run_cmd_verify_hist {$server LINSERT l3 AFTER 9 10} {db0_LIST:4=1,16=1}
run_cmd_verify_hist {$server LTRIM l3 0 8} {db0_LIST:4=1,8=1}
# DEL
run_cmd_verify_hist {$server DEL l3} {db0_LIST:4=1}
run_cmd_verify_hist {$server DEL l2} {}
# LMOVE, BLMOVE
run_cmd_verify_hist {$server RPUSH l4 1 2 3 4 5 6 7 8} {db0_LIST:8=1}
run_cmd_verify_hist {$server LMOVE l4 l5 LEFT LEFT} {db0_LIST:1=1,4=1}
run_cmd_verify_hist {$server LMOVE l4 l5 RIGHT RIGHT} {db0_LIST:2=1,4=1}
run_cmd_verify_hist {$server LMOVE l4 l5 LEFT RIGHT} {db0_LIST:2=1,4=1}
run_cmd_verify_hist {$server LMOVE l4 l5 RIGHT LEFT} {db0_LIST:4=2}
run_cmd_verify_hist {$server BLMOVE l4 l5 RIGHT LEFT 0} {db0_LIST:2=1,4=1}
# DEL
run_cmd_verify_hist {$server DEL l4} {db0_LIST:4=1}
run_cmd_verify_hist {$server DEL l5} {}
# LMPOP
run_cmd_verify_hist {$server RPUSH l6 1 2 3 4 5 6 7 8 9 10} {db0_LIST:8=1}
run_cmd_verify_hist {$server LMPOP 1 l6 LEFT COUNT 2} {db0_LIST:8=1}
run_cmd_verify_hist {$server LMPOP 1 l6 LEFT COUNT 1} {db0_LIST:4=1}
run_cmd_verify_hist {$server LMPOP 1 l6 LEFT COUNT 6} {db0_LIST:1=1}
# LPOP
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server RPUSH l7 1 2 3 4} {db0_LIST:4=1}
run_cmd_verify_hist {$server LPOP l7} {db0_LIST:2=1}
# LREM
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server RPUSH l8 1 x 3 x 5 x 7 x 9 10} {db0_LIST:8=1}
run_cmd_verify_hist {$server LREM l8 3 x} {db0_LIST:4=1}
# EXPIRE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server RPUSH l9 1 2 3 4} {db0_LIST:4=1}
run_cmd_verify_hist {$server PEXPIRE l9 50} {db0_LIST:4=1}
run_cmd_verify_hist {} {} 1
# SET overwrites
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server RPUSH l9 1 2 3 4} {db0_LIST:4=1}
run_cmd_verify_hist {$server SET l9 1234567} {db0_STR:4=1}
run_cmd_verify_hist {$server DEL l9} {}
} {} {cluster:skip}
test "KEYSIZES - Test SET $suffixRepl" {
run_cmd_verify_hist {$server FLUSHALL} {}
# SADD
run_cmd_verify_hist {$server SADD s1 1 2 3 4 5} {db0_SET:4=1}
run_cmd_verify_hist {$server SADD s1 6 7 8} {db0_SET:8=1}
# Test also SADD, SREM, SMOVE, SPOP
run_cmd_verify_hist {$server SADD s2 1} {db0_SET:1=1,8=1}
run_cmd_verify_hist {$server SADD s2 2} {db0_SET:2=1,8=1}
run_cmd_verify_hist {$server SREM s2 3} {db0_SET:2=1,8=1}
run_cmd_verify_hist {$server SMOVE s2 s3 2} {db0_SET:1=2,8=1}
run_cmd_verify_hist {$server SPOP s3} {db0_SET:1=1,8=1}
run_cmd_verify_hist {$server SPOP s2} {db0_SET:8=1}
run_cmd_verify_hist {$server SPOP s1} {db0_SET:4=1}
run_cmd_verify_hist {$server del s1} {}
# SDIFFSTORE
run_cmd_verify_hist {$server flushall} {}
run_cmd_verify_hist {$server SADD s1 1 2 3 4 5 6 7 8} {db0_SET:8=1}
run_cmd_verify_hist {$server SADD s2 6 7 8 9 A B C D} {db0_SET:8=2}
run_cmd_verify_hist {$server SDIFFSTORE s3 s1 s2} {db0_SET:4=1,8=2}
#SINTERSTORE
run_cmd_verify_hist {$server flushall} {}
run_cmd_verify_hist {$server SADD s1 1 2 3 4 5 6 7 8} {db0_SET:8=1}
run_cmd_verify_hist {$server SADD s2 6 7 8 9 A B C D} {db0_SET:8=2}
run_cmd_verify_hist {$server SINTERSTORE s3 s1 s2} {db0_SET:2=1,8=2}
#SUNIONSTORE
run_cmd_verify_hist {$server flushall} {}
run_cmd_verify_hist {$server SADD s1 1 2 3 4 5 6 7 8} {db0_SET:8=1}
run_cmd_verify_hist {$server SADD s2 6 7 8 9 A B C D} {db0_SET:8=2}
run_cmd_verify_hist {$server SUNIONSTORE s3 s1 s2} {db0_SET:8=3}
run_cmd_verify_hist {$server SADD s4 E F G H} {db0_SET:4=1,8=3}
run_cmd_verify_hist {$server SUNIONSTORE s5 s3 s4} {db0_SET:4=1,8=3,16=1}
# DEL
run_cmd_verify_hist {$server flushall} {}
run_cmd_verify_hist {$server SADD s1 1 2 3 4 5 6 7 8} {db0_SET:8=1}
run_cmd_verify_hist {$server DEL s1} {}
# EXPIRE
run_cmd_verify_hist {$server flushall} {}
run_cmd_verify_hist {$server SADD s1 1 2 3 4 5 6 7 8} {db0_SET:8=1}
run_cmd_verify_hist {$server PEXPIRE s1 50} {db0_SET:8=1}
run_cmd_verify_hist {} {} 1
# SET overwrites
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server SADD s1 1 2 3 4 5 6 7 8} {db0_SET:8=1}
run_cmd_verify_hist {$server SET s1 1234567} {db0_STR:4=1}
run_cmd_verify_hist {$server DEL s1} {}
} {} {cluster:skip}
test "KEYSIZES - Test ZSET $suffixRepl" {
# ZADD, ZREM
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZADD z1 6 f 7 g 8 h 9 i} {db0_ZSET:8=1}
run_cmd_verify_hist {$server ZADD z2 1 a} {db0_ZSET:1=1,8=1}
run_cmd_verify_hist {$server ZREM z1 a} {db0_ZSET:1=1,8=1}
run_cmd_verify_hist {$server ZREM z1 b} {db0_ZSET:1=1,4=1}
# ZREMRANGEBYSCORE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZREMRANGEBYSCORE z1 -inf (2} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZREMRANGEBYSCORE z1 -inf (3} {db0_ZSET:2=1}
# ZREMRANGEBYRANK
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e 6 f} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZREMRANGEBYRANK z1 0 1} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZREMRANGEBYRANK z1 0 0} {db0_ZSET:2=1}
# ZREMRANGEBYLEX
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 0 a 0 b 0 c 0 d 0 e 0 f} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZREMRANGEBYLEX z1 - (d} {db0_ZSET:2=1}
# ZUNIONSTORE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZADD z2 6 f 7 g 8 h 9 i} {db0_ZSET:4=2}
run_cmd_verify_hist {$server ZUNIONSTORE z3 2 z1 z2} {db0_ZSET:4=2,8=1}
# ZINTERSTORE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZADD z2 3 c 4 d 5 e 6 f} {db0_ZSET:4=2}
run_cmd_verify_hist {$server ZINTERSTORE z3 2 z1 z2} {db0_ZSET:2=1,4=2}
# BZPOPMIN, BZPOPMAX
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server BZPOPMIN z1 0} {db0_ZSET:4=1}
run_cmd_verify_hist {$server BZPOPMAX z1 0} {db0_ZSET:2=1}
# ZDIFFSTORE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZADD z2 3 c 4 d 5 e 6 f} {db0_ZSET:4=2}
run_cmd_verify_hist {$server ZDIFFSTORE z3 2 z1 z2} {db0_ZSET:2=1,4=2}
# ZINTERSTORE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZADD z2 3 c 4 d 5 e 6 f} {db0_ZSET:4=2}
run_cmd_verify_hist {$server ZINTERSTORE z3 2 z1 z2} {db0_ZSET:2=1,4=2}
# DEL
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server DEL z1} {}
# EXPIRE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server PEXPIRE z1 50} {db0_ZSET:4=1}
run_cmd_verify_hist {} {} 1
# SET overwrites
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server SET z1 1234567} {db0_STR:4=1}
run_cmd_verify_hist {$server DEL z1} {}
} {} {cluster:skip}
test "KEYSIZES - Test STRING $suffixRepl" {
# SETRANGE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server SET s2 1234567890} {db0_STR:8=1}
run_cmd_verify_hist {$server SETRANGE s2 10 123456} {db0_STR:16=1}
# MSET, MSETNX
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server MSET s3 1 s4 2 s5 3} {db0_STR:1=3}
run_cmd_verify_hist {$server MSETNX s6 1 s7 2 s8 3} {db0_STR:1=6}
# DEL
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server SET s9 1234567890} {db0_STR:8=1}
run_cmd_verify_hist {$server DEL s9} {}
#EXPIRE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server SET s10 1234567890} {db0_STR:8=1}
run_cmd_verify_hist {$server PEXPIRE s10 50} {db0_STR:8=1}
run_cmd_verify_hist {} {} 1
# SET (+overwrite)
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server SET s1 1024} {db0_STR:4=1}
run_cmd_verify_hist {$server SET s1 842} {db0_STR:2=1}
run_cmd_verify_hist {$server SET s1 2} {db0_STR:1=1}
run_cmd_verify_hist {$server SET s1 1234567} {db0_STR:4=1}
} {} {cluster:skip}
foreach type {listpackex hashtable} {
# Test different implementations of hash tables and listpacks
if {$type eq "hashtable"} {
$server config set hash-max-listpack-entries 0
} else {
$server config set hash-max-listpack-entries 512
}
test "KEYSIZES - Test HASH ($type) $suffixRepl" {
# HSETNX
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server HSETNX h1 1 1} {db0_HASH:1=1}
run_cmd_verify_hist {$server HSETNX h1 2 2} {db0_HASH:2=1}
# HSET, HDEL
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server HSET h2 1 1} {db0_HASH:1=1}
run_cmd_verify_hist {$server HSET h2 2 2} {db0_HASH:2=1}
run_cmd_verify_hist {$server HDEL h2 1} {db0_HASH:1=1}
run_cmd_verify_hist {$server HDEL h2 2} {}
# HMSET
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server HMSET h1 1 1 2 2 3 3} {db0_HASH:2=1}
run_cmd_verify_hist {$server HMSET h1 1 1 2 2 3 3} {db0_HASH:2=1}
run_cmd_verify_hist {$server HMSET h1 1 1 2 2 3 3 4 4} {db0_HASH:4=1}
# HINCRBY
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server hincrby h1 f1 10} {db0_HASH:1=1}
run_cmd_verify_hist {$server hincrby h1 f1 10} {db0_HASH:1=1}
run_cmd_verify_hist {$server hincrby h1 f2 20} {db0_HASH:2=1}
# HINCRBYFLOAT
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server hincrbyfloat h1 f1 10.5} {db0_HASH:1=1}
run_cmd_verify_hist {$server hincrbyfloat h1 f1 10.5} {db0_HASH:1=1}
run_cmd_verify_hist {$server hincrbyfloat h1 f2 10.5} {db0_HASH:2=1}
# HEXPIRE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server HSET h1 f1 1} {db0_HASH:1=1}
run_cmd_verify_hist {$server HSET h1 f2 1} {db0_HASH:2=1}
run_cmd_verify_hist {$server HPEXPIREAT h1 1 FIELDS 1 f1} {db0_HASH:1=1}
run_cmd_verify_hist {$server HSET h1 f3 1} {db0_HASH:2=1}
run_cmd_verify_hist {$server HPEXPIRE h1 50 FIELDS 1 f2} {db0_HASH:2=1}
run_cmd_verify_hist {} {db0_HASH:1=1} 1
run_cmd_verify_hist {$server HPEXPIRE h1 50 FIELDS 1 f3} {db0_HASH:1=1}
run_cmd_verify_hist {} {} 1
}
}
test "KEYSIZES - Test STRING BITS $suffixRepl" {
# BITOPS
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server SET b1 "x123456789"} {db0_STR:8=1}
run_cmd_verify_hist {$server SET b2 "x12345678"} {db0_STR:8=2}
run_cmd_verify_hist {$server BITOP AND b3 b1 b2} {db0_STR:8=3}
run_cmd_verify_hist {$server BITOP OR b4 b1 b2} {db0_STR:8=4}
run_cmd_verify_hist {$server BITOP XOR b5 b1 b2} {db0_STR:8=5}
# SETBIT
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server setbit b1 71 1} {db0_STR:8=1}
run_cmd_verify_hist {$server setbit b1 72 1} {db0_STR:8=1}
run_cmd_verify_hist {$server setbit b2 72 1} {db0_STR:8=2}
run_cmd_verify_hist {$server setbit b2 640 0} {db0_STR:8=1,64=1}
# BITFIELD
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server bitfield b3 set u8 6 255} {db0_STR:2=1}
run_cmd_verify_hist {$server bitfield b3 set u8 65 255} {db0_STR:8=1}
run_cmd_verify_hist {$server bitfield b4 set u8 1000 255} {db0_STR:8=1,64=1}
} {} {cluster:skip}
test "KEYSIZES - Test RESTORE $suffixRepl" {
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server RPUSH l10 1 2 3 4} {db0_LIST:4=1}
set encoded [$server dump l10]
run_cmd_verify_hist {$server del l10} {}
run_cmd_verify_hist {$server restore l11 0 $encoded} {db0_LIST:4=1}
}
test "KEYSIZES - Test RENAME $suffixRepl" {
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server RPUSH l12 1 2 3 4} {db0_LIST:4=1}
run_cmd_verify_hist {$server RENAME l12 l13} {db0_LIST:4=1}
} {} {cluster:skip}
test "KEYSIZES - Test MOVE $suffixRepl" {
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server RPUSH l1 1 2 3 4} {db0_LIST:4=1}
run_cmd_verify_hist {$server RPUSH l2 1} {db0_LIST:1=1,4=1}
run_cmd_verify_hist {$server MOVE l1 1} {db0_LIST:1=1 db1_LIST:4=1}
} {} {cluster:skip}
test "KEYSIZES - Test SWAPDB $suffixRepl" {
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server RPUSH l1 1 2 3 4} {db0_LIST:4=1}
$server select 1
run_cmd_verify_hist {$server ZADD z1 1 A} {db0_LIST:4=1 db1_ZSET:1=1}
run_cmd_verify_hist {$server SWAPDB 0 1} {db0_ZSET:1=1 db1_LIST:4=1}
$server select 0
} {OK} {singledb:skip}
test "KEYSIZES - Test RDB $suffixRepl" {
run_cmd_verify_hist {$server FLUSHALL} {}
# Write list, set and zset to db0
run_cmd_verify_hist {$server RPUSH l1 1 2 3 4} {db0_LIST:4=1}
run_cmd_verify_hist {$server SADD s1 1 2 3 4 5} {db0_LIST:4=1 db0_SET:4=1}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c} {db0_LIST:4=1 db0_SET:4=1 db0_ZSET:2=1}
run_cmd_verify_hist {$server SAVE} {db0_LIST:4=1 db0_SET:4=1 db0_ZSET:2=1}
if {$replicaMode eq 1} {
run_cmd_verify_hist {restart_server -1 true false} {db0_LIST:4=1 db0_SET:4=1 db0_ZSET:2=1}
} else {
run_cmd_verify_hist {restart_server 0 true false} {db0_LIST:4=1 db0_SET:4=1 db0_ZSET:2=1}
}
} {} {external:skip}
}
start_server {} {
# Test KEYSIZES on a single server
r select 0
test_all_keysizes 0
# Start another server to test replication of KEYSIZES
start_server {tags {needs:repl external:skip}} {
# Set the outer layer server as primary
set primary [srv -1 client]
set primary_host [srv -1 host]
set primary_port [srv -1 port]
# Set this inner layer server as replica
set replica [srv 0 client]
# Server should have role replica
$replica replicaof $primary_host $primary_port
wait_for_condition 50 100 { [s 0 role] eq {slave} } else { fail "Replication not started." }
# Test KEYSIZES on leader and replica
$primary select 0
test_all_keysizes 1
}
}

View File

@ -1,3 +1,16 @@
#
# Copyright (c) 2009-Present, Redis Ltd.
# All rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of the Redis Source Available License 2.0
# (RSALv2) or the Server Side Public License v1 (SSPLv1).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
proc cmdstat {cmd} { proc cmdstat {cmd} {
return [cmdrstat $cmd r] return [cmdrstat $cmd r]
} }
@ -313,7 +326,7 @@ start_server {tags {"info" "external:skip"}} {
assert_lessthan $cycle2 [expr $cycle1+10] ;# we expect 2 or 3 cycles here, but allow some tolerance assert_lessthan $cycle2 [expr $cycle1+10] ;# we expect 2 or 3 cycles here, but allow some tolerance
if {$::verbose} { puts "eventloop metrics el_sum1: $el_sum1, el_sum2: $el_sum2" } if {$::verbose} { puts "eventloop metrics el_sum1: $el_sum1, el_sum2: $el_sum2" }
assert_morethan $el_sum2 $el_sum1 assert_morethan $el_sum2 $el_sum1
assert_lessthan $el_sum2 [expr $el_sum1+30000] ;# we expect roughly 100ms here, but allow some tolerance assert_lessthan $el_sum2 [expr $el_sum1+100000] ;# we expect roughly 100ms here, but allow some tolerance
if {$::verbose} { puts "eventloop metrics cmd_sum1: $cmd_sum1, cmd_sum2: $cmd_sum2" } if {$::verbose} { puts "eventloop metrics cmd_sum1: $cmd_sum1, cmd_sum2: $cmd_sum2" }
assert_morethan $cmd_sum2 $cmd_sum1 assert_morethan $cmd_sum2 $cmd_sum1
assert_lessthan $cmd_sum2 [expr $cmd_sum1+15000] ;# we expect about tens of ms here, but allow some tolerance assert_lessthan $cmd_sum2 [expr $cmd_sum1+15000] ;# we expect about tens of ms here, but allow some tolerance
@ -386,10 +399,10 @@ start_server {tags {"info" "external:skip"}} {
r config set client-output-buffer-limit "normal 10 0 0" r config set client-output-buffer-limit "normal 10 0 0"
r set key [string repeat a 100000] ;# to trigger output buffer limit check this needs to be big r set key [string repeat a 100000] ;# to trigger output buffer limit check this needs to be big
catch {r get key} catch {r get key}
r config set client-output-buffer-limit $org_outbuf_limit
set info [r info stats] set info [r info stats]
assert_equal [getInfoProperty $info client_output_buffer_limit_disconnections] {1} assert_equal [getInfoProperty $info client_output_buffer_limit_disconnections] {1}
r config set client-output-buffer-limit $org_outbuf_limit } {} {logreqres:skip} ;# same as obuf-limits.tcl, skip logreqres
} {OK} {logreqres:skip} ;# same as obuf-limits.tcl, skip logreqres
test {clients: pubsub clients} { test {clients: pubsub clients} {
set info [r info clients] set info [r info clients]

View File

@ -6,8 +6,13 @@ start_server {tags {"introspection"}} {
} }
test {CLIENT LIST} { test {CLIENT LIST} {
r client list set client_list [r client list]
} {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|list user=* redir=-1 resp=*} if {[lindex [r config get io-threads] 1] == 1} {
assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|list user=* redir=-1 resp=*} $client_list
} else {
assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=0 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|list user=* redir=-1 resp=*} $client_list
}
}
test {CLIENT LIST with IDs} { test {CLIENT LIST with IDs} {
set myid [r client id] set myid [r client id]
@ -16,8 +21,13 @@ start_server {tags {"introspection"}} {
} }
test {CLIENT INFO} { test {CLIENT INFO} {
r client info set client [r client info]
} {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|info user=* redir=-1 resp=*} if {[lindex [r config get io-threads] 1] == 1} {
assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|info user=* redir=-1 resp=*} $client
} else {
assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=0 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|info user=* redir=-1 resp=*} $client
}
}
test {CLIENT KILL with illegal arguments} { test {CLIENT KILL with illegal arguments} {
assert_error "ERR wrong number of arguments for 'client|kill' command" {r client kill} assert_error "ERR wrong number of arguments for 'client|kill' command" {r client kill}
@ -86,6 +96,11 @@ start_server {tags {"introspection"}} {
assert {$connected_clients >= 3} assert {$connected_clients >= 3}
set res [r client kill skipme yes] set res [r client kill skipme yes]
assert {$res == $connected_clients - 1} assert {$res == $connected_clients - 1}
wait_for_condition 1000 10 {
[s connected_clients] eq 1
} else {
fail "Can't kill all clients except the current one"
}
# Kill all clients, including `me` # Kill all clients, including `me`
set rd3 [redis_deferring_client] set rd3 [redis_deferring_client]
@ -304,6 +319,9 @@ start_server {tags {"introspection"}} {
$rd read ; # Discard the OK $rd read ; # Discard the OK
$bc blpop mylist 0 $bc blpop mylist 0
# make sure the blpop arrives first
$bc flush
after 100
wait_for_blocked_clients_count 1 wait_for_blocked_clients_count 1
r lpush mylist 1 r lpush mylist 1
wait_for_blocked_clients_count 0 wait_for_blocked_clients_count 0
@ -904,3 +922,62 @@ test {CONFIG REWRITE handles alias config properly} {
assert_equal [r config get hash-max-listpack-entries] {hash-max-listpack-entries 100} assert_equal [r config get hash-max-listpack-entries] {hash-max-listpack-entries 100}
} }
} {} {external:skip} } {} {external:skip}
test {IO threads client number} {
start_server {overrides {io-threads 2} tags {external:skip}} {
set iothread_clients [get_io_thread_clients 1]
assert_equal $iothread_clients [s connected_clients]
assert_equal [get_io_thread_clients 0] 0
r script debug yes ; # Transfer to main thread
assert_equal [get_io_thread_clients 0] 1
assert_equal [get_io_thread_clients 1] [expr $iothread_clients - 1]
set iothread_clients [get_io_thread_clients 1]
set rd1 [redis_deferring_client]
set rd2 [redis_deferring_client]
assert_equal [get_io_thread_clients 1] [expr $iothread_clients + 2]
$rd1 close
$rd2 close
wait_for_condition 1000 10 {
[get_io_thread_clients 1] eq $iothread_clients
} else {
fail "Fail to close clients of io thread 1"
}
assert_equal [get_io_thread_clients 0] 1
r script debug no ; # Transfer to io thread
assert_equal [get_io_thread_clients 0] 0
assert_equal [get_io_thread_clients 1] [expr $iothread_clients + 1]
}
}
test {Clients are evenly distributed among io threads} {
start_server {overrides {io-threads 4} tags {external:skip}} {
set cur_clients [s connected_clients]
assert_equal $cur_clients 1
global rdclients
for {set i 1} {$i < 9} {incr i} {
set rdclients($i) [redis_deferring_client]
}
for {set i 1} {$i <= 3} {incr i} {
assert_equal [get_io_thread_clients $i] 3
}
$rdclients(3) close
$rdclients(4) close
wait_for_condition 1000 10 {
[get_io_thread_clients 1] eq 2 &&
[get_io_thread_clients 2] eq 2 &&
[get_io_thread_clients 3] eq 3
} else {
fail "Fail to close clients"
}
set $rdclients(3) [redis_deferring_client]
set $rdclients(4) [redis_deferring_client]
for {set i 1} {$i <= 3} {incr i} {
assert_equal [get_io_thread_clients $i] 3
}
}
}

View File

@ -29,7 +29,11 @@ start_server {tags {"maxmemory" "external:skip"}} {
set dbsize [r dbsize] set dbsize [r dbsize]
if $client_eviction { if $client_eviction {
return [expr $evicted_clients > 0 && $evicted_keys == 0 && $dbsize == 50] if {[lindex [r config get io-threads] 1] == 1} {
return [expr $evicted_clients > 0 && $evicted_keys == 0 && $dbsize == 50]
} else {
return [expr $evicted_clients >= 0 && $evicted_keys >= 0 && $dbsize <= 50]
}
} else { } else {
return [expr $evicted_clients == 0 && $evicted_keys > 0 && $dbsize < 50] return [expr $evicted_clients == 0 && $evicted_keys > 0 && $dbsize < 50]
} }

Some files were not shown because too many files have changed in this diff Show More