Merge remote-tracking branch 'upstream/unstable' into HEAD

This commit is contained in:
YaacovHazan 2025-01-14 14:01:19 +02:00
commit 9c81f8bd61
115 changed files with 8217 additions and 1635 deletions

View File

@ -76,7 +76,6 @@ jobs:
if: |
(github.event_name == 'workflow_dispatch' || (github.event_name != 'workflow_dispatch' && github.repository == 'redis/redis')) &&
!contains(github.event.inputs.skipjobs, 'fortify')
container: ubuntu:lunar
timeout-minutes: 14400
steps:
- name: prep
@ -94,12 +93,10 @@ jobs:
ref: ${{ env.GITHUB_HEAD_REF }}
- name: make
run: |
apt-get update && apt-get install -y make gcc-13 g++-13
update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-13 100
update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-13 100
apt-get update && apt-get install -y make gcc g++
make CC=gcc REDIS_CFLAGS='-Werror -DREDIS_TEST -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3'
- name: testprep
run: apt-get install -y tcl8.6 tclx procps
run: sudo apt-get install -y tcl8.6 tclx procps
- name: test
if: true && !contains(github.event.inputs.skiptests, 'redis')
run: ./runtest --accurate --verbose --dump-logs ${{github.event.inputs.test_args}}
@ -876,7 +873,7 @@ jobs:
build-macos:
strategy:
matrix:
os: [macos-12, macos-14]
os: [macos-13, macos-15]
runs-on: ${{ matrix.os }}
if: |
(github.event_name == 'workflow_dispatch' || (github.event_name != 'workflow_dispatch' && github.repository == 'redis/redis')) &&
@ -903,7 +900,7 @@ jobs:
run: make REDIS_CFLAGS='-Werror -DREDIS_TEST'
test-freebsd:
runs-on: macos-12
runs-on: macos-13
if: |
(github.event_name == 'workflow_dispatch' || (github.event_name != 'workflow_dispatch' && github.repository == 'redis/redis')) &&
!contains(github.event.inputs.skipjobs, 'freebsd')

View File

@ -15,7 +15,8 @@ Another good example is to think of Redis as a more complex version of memcached
If you want to know more, this is a list of selected starting points:
* Introduction to Redis data types. https://redis.io/topics/data-types-intro
* Introduction to Redis data types. https://redis.io/docs/latest/develop/data-types/
* The full list of Redis commands. https://redis.io/commands
* There is much more inside the official Redis documentation. https://redis.io/documentation
@ -493,7 +494,7 @@ Other C files
* `dict.c` is an implementation of a non-blocking hash table which rehashes incrementally.
* `cluster.c` implements the Redis Cluster. Probably a good read only after being very familiar with the rest of the Redis code base. If you want to read `cluster.c` make sure to read the [Redis Cluster specification][4].
[4]: https://redis.io/topics/cluster-spec
[4]: https://redis.io/docs/latest/operate/oss_and_stack/reference/cluster-spec/
Anatomy of a Redis command
---

View File

@ -478,7 +478,7 @@ static int __redisGetSubscribeCallback(redisAsyncContext *ac, redisReply *reply,
/* Match reply with the expected format of a pushed message.
* The type and number of elements (3 to 4) are specified at:
* https://redis.io/topics/pubsub#format-of-pushed-messages */
* https://redis.io/docs/latest/develop/interact/pubsub/#format-of-pushed-messages */
if ((reply->type == REDIS_REPLY_ARRAY && !(c->flags & REDIS_SUPPORTS_PUSH) && reply->elements >= 3) ||
reply->type == REDIS_REPLY_PUSH) {
assert(reply->element[0]->type == REDIS_REPLY_STRING);

View File

@ -727,6 +727,24 @@ repl-disable-tcp-nodelay no
#
# repl-backlog-ttl 3600
# During a fullsync, the master may decide to send both the RDB file and the
# replication stream to the replica in parallel. This approach shifts the
# responsibility of buffering the replication stream to the replica during the
# fullsync process. The replica accumulates the replication stream data until
# the RDB file is fully loaded. Once the RDB delivery is completed and
# successfully loaded, the replica begins processing and applying the
# accumulated replication data to the db. The configuration below controls how
# much replication data the replica can accumulate during a fullsync.
#
# When the replica reaches this limit, it will stop accumulating further data.
# At this point, additional data accumulation may occur on the master side
# depending on the 'client-output-buffer-limit <replica>' config of master.
#
# A value of 0 means replica inherits hard limit of
# 'client-output-buffer-limit <replica>' config to limit accumulation size.
#
# replica-full-sync-buffer-limit 0
# The replica priority is an integer number published by Redis in the INFO
# output. It is used by Redis Sentinel in order to select a replica to promote
# into a master if the master is no longer working correctly.
@ -838,7 +856,7 @@ replica-priority 100
# this is used in order to send invalidation messages to clients. Please
# check this page to understand more about the feature:
#
# https://redis.io/topics/client-side-caching
# https://redis.io/docs/latest/develop/use/client-side-caching/
#
# When tracking is enabled for a client, all the read only queries are assumed
# to be cached: this will force Redis to store information in the invalidation
@ -1016,7 +1034,7 @@ replica-priority 100
# * stream - Data type: streams related.
#
# For more information about ACL configuration please refer to
# the Redis web site at https://redis.io/topics/acl
# the Redis web site at https://redis.io/docs/latest/operate/oss_and_stack/management/security/acl/
# ACL LOG
#
@ -1291,38 +1309,27 @@ lazyfree-lazy-user-flush no
# in different I/O threads. Since especially writing is so slow, normally
# Redis users use pipelining in order to speed up the Redis performances per
# core, and spawn multiple instances in order to scale more. Using I/O
# threads it is possible to easily speedup two times Redis without resorting
# threads it is possible to easily speedup several times Redis without resorting
# to pipelining nor sharding of the instance.
#
# By default threading is disabled, we suggest enabling it only in machines
# that have at least 4 or more cores, leaving at least one spare core.
# Using more than 8 threads is unlikely to help much. We also recommend using
# threaded I/O only if you actually have performance problems, with Redis
# instances being able to use a quite big percentage of CPU time, otherwise
# there is no point in using this feature.
# We also recommend using threaded I/O only if you actually have performance
# problems, with Redis instances being able to use a quite big percentage of
# CPU time, otherwise there is no point in using this feature.
#
# So for instance if you have a four cores boxes, try to use 2 or 3 I/O
# threads, if you have a 8 cores, try to use 6 threads. In order to
# So for instance if you have a four cores boxes, try to use 3 I/O
# threads, if you have a 8 cores, try to use 7 threads. In order to
# enable I/O threads use the following configuration directive:
#
# io-threads 4
#
# Setting io-threads to 1 will just use the main thread as usual.
# When I/O threads are enabled, we only use threads for writes, that is
# to thread the write(2) syscall and transfer the client buffers to the
# socket. However it is also possible to enable threading of reads and
# protocol parsing using the following configuration directive, by setting
# it to yes:
# When I/O threads are enabled, we not only use threads for writes, that
# is to thread the write(2) syscall and transfer the client buffers to the
# socket, but also use threads for reads and protocol parsing.
#
# io-threads-do-reads no
#
# Usually threading reads doesn't help much.
#
# NOTE 1: This configuration directive cannot be changed at runtime via
# CONFIG SET. Also, this feature currently does not work when SSL is
# enabled.
#
# NOTE 2: If you want to test the Redis speedup using redis-benchmark, make
# NOTE: If you want to test the Redis speedup using redis-benchmark, make
# sure you also run the benchmark itself in threaded mode, using the
# --threads option to match the number of Redis threads, otherwise you'll not
# be able to notice the improvements.
@ -1362,7 +1369,7 @@ oom-score-adj-values 0 200 800
#################### KERNEL transparent hugepage CONTROL ######################
# Usually the kernel Transparent Huge Pages control is set to "madvise" or
# or "never" by default (/sys/kernel/mm/transparent_hugepage/enabled), in which
# "never" by default (/sys/kernel/mm/transparent_hugepage/enabled), in which
# case this config has no effect. On systems in which it is set to "always",
# redis will attempt to disable it specifically for the redis process in order
# to avoid latency problems specifically with fork(2) and CoW.
@ -1393,7 +1400,7 @@ disable-thp yes
# restarting the server can lead to data loss. A conversion needs to be done
# by setting it via CONFIG command on a live server first.
#
# Please check https://redis.io/topics/persistence for more information.
# Please check https://redis.io/docs/latest/operate/oss_and_stack/management/persistence/ for more information.
appendonly no
@ -1880,7 +1887,7 @@ latency-monitor-threshold 0
############################# EVENT NOTIFICATION ##############################
# Redis can notify Pub/Sub clients about events happening in the key space.
# This feature is documented at https://redis.io/topics/notifications
# This feature is documented at https://redis.io/docs/latest/develop/use/keyspace-notifications/
#
# For instance if keyspace events notification is enabled, and a client
# performs a DEL operation on key "foo" stored in the Database 0, two

View File

@ -133,7 +133,7 @@ sentinel monitor mymaster 127.0.0.1 6379 2
sentinel down-after-milliseconds mymaster 30000
# IMPORTANT NOTE: starting with Redis 6.2 ACL capability is supported for
# Sentinel mode, please refer to the Redis website https://redis.io/topics/acl
# Sentinel mode, please refer to the Redis website https://redis.io/docs/latest/operate/oss_and_stack/management/security/acl/
# for more details.
# Sentinel's ACL users are defined in the following format:
@ -145,7 +145,7 @@ sentinel down-after-milliseconds mymaster 30000
# user worker +@admin +@connection ~* on >ffa9203c493aa99
#
# For more information about ACL configuration please refer to the Redis
# website at https://redis.io/topics/acl and redis server configuration
# website at https://redis.io/docs/latest/operate/oss_and_stack/management/security/acl/ and redis server configuration
# template redis.conf.
# ACL LOG
@ -174,7 +174,7 @@ acllog-max-len 128
# so Sentinel will try to authenticate with the same password to all the
# other Sentinels. So you need to configure all your Sentinels in a given
# group with the same "requirepass" password. Check the following documentation
# for more info: https://redis.io/topics/sentinel
# for more info: https://redis.io/docs/latest/operate/oss_and_stack/management/sentinel/
#
# IMPORTANT NOTE: starting with Redis 6.2 "requirepass" is a compatibility
# layer on top of the ACL system. The option effect will be just setting

View File

@ -354,11 +354,11 @@ endif
REDIS_SERVER_NAME=redis-server$(PROG_SUFFIX)
REDIS_SENTINEL_NAME=redis-sentinel$(PROG_SUFFIX)
REDIS_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o ebuckets.o mstr.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o
REDIS_SERVER_OBJ=threads_mngr.o adlist.o quicklist.o ae.o anet.o dict.o ebuckets.o eventnotifier.o iothread.o mstr.o kvstore.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o cluster_legacy.o crc16.o endianconv.o slowlog.o eval.o bio.o rio.o rand.o memtest.o syscheck.o crcspeed.o crccombine.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o redis-check-aof.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o rax.o t_stream.o listpack.o localtime.o lolwut.o lolwut5.o lolwut6.o acl.o tracking.o socket.o tls.o sha256.o timeout.o setcpuaffinity.o monotonic.o mt19937-64.o resp_parser.o call_reply.o script_lua.o script.o functions.o function_lua.o commands.o strl.o connection.o unix.o logreqres.o
REDIS_CLI_NAME=redis-cli$(PROG_SUFFIX)
REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o redisassert.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o
REDIS_CLI_OBJ=anet.o adlist.o dict.o redis-cli.o zmalloc.o release.o ae.o redisassert.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o cli_commands.o
REDIS_BENCHMARK_NAME=redis-benchmark$(PROG_SUFFIX)
REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o dict.o zmalloc.o redisassert.o release.o crcspeed.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o
REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o dict.o zmalloc.o redisassert.o release.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o
REDIS_CHECK_RDB_NAME=redis-check-rdb$(PROG_SUFFIX)
REDIS_CHECK_AOF_NAME=redis-check-aof$(PROG_SUFFIX)
ALL_SOURCES=$(sort $(patsubst %.o,%.c,$(REDIS_SERVER_OBJ) $(REDIS_CLI_OBJ) $(REDIS_BENCHMARK_OBJ)))

View File

@ -277,7 +277,7 @@ int ACLListMatchSds(void *a, void *b) {
/* Method to free list elements from ACL users password/patterns lists. */
void ACLListFreeSds(void *item) {
sdsfree(item);
sdsfreegeneric(item);
}
/* Method to duplicate list elements from ACL users password/patterns lists. */
@ -469,6 +469,11 @@ void ACLFreeUser(user *u) {
zfree(u);
}
/* Generic version of ACLFreeUser. */
void ACLFreeUserGeneric(void *u) {
ACLFreeUser((user *)u);
}
/* When a user is deleted we need to cycle the active
* connections in order to kill all the pending ones that
* are authenticated with such user. */
@ -1061,19 +1066,24 @@ int ACLSetSelector(aclSelector *selector, const char* op, size_t oplen) {
int flags = 0;
size_t offset = 1;
if (op[0] == '%') {
int perm_ok = 1;
for (; offset < oplen; offset++) {
if (toupper(op[offset]) == 'R' && !(flags & ACL_READ_PERMISSION)) {
flags |= ACL_READ_PERMISSION;
} else if (toupper(op[offset]) == 'W' && !(flags & ACL_WRITE_PERMISSION)) {
flags |= ACL_WRITE_PERMISSION;
} else if (op[offset] == '~' && flags) {
} else if (op[offset] == '~') {
offset++;
break;
} else {
perm_ok = 0;
break;
}
}
if (!flags || !perm_ok) {
errno = EINVAL;
return C_ERR;
}
}
} else {
flags = ACL_ALL_PERMISSION;
}
@ -1577,14 +1587,22 @@ static int ACLSelectorCheckKey(aclSelector *selector, const char *key, int keyle
if (keyspec_flags & CMD_KEY_DELETE) key_flags |= ACL_WRITE_PERMISSION;
if (keyspec_flags & CMD_KEY_UPDATE) key_flags |= ACL_WRITE_PERMISSION;
/* Is given key represent a prefix of a set of keys */
int prefix = keyspec_flags & CMD_KEY_PREFIX;
/* Test this key against every pattern. */
while((ln = listNext(&li))) {
keyPattern *pattern = listNodeValue(ln);
if ((pattern->flags & key_flags) != key_flags)
continue;
size_t plen = sdslen(pattern->pattern);
if (stringmatchlen(pattern->pattern,plen,key,keylen,0))
if (prefix) {
if (prefixmatch(pattern->pattern,plen,key,keylen,0))
return ACL_OK;
} else {
if (stringmatchlen(pattern->pattern, plen, key, keylen, 0))
return ACL_OK;
}
}
return ACL_DENIED_KEY;
}
@ -2446,12 +2464,12 @@ sds ACLLoadFromFile(const char *filename) {
}
if (user_channels)
raxFreeWithCallback(user_channels, (void(*)(void*))listRelease);
raxFreeWithCallback(old_users,(void(*)(void*))ACLFreeUser);
raxFreeWithCallback(user_channels, listReleaseGeneric);
raxFreeWithCallback(old_users, ACLFreeUserGeneric);
sdsfree(errors);
return NULL;
} else {
raxFreeWithCallback(Users,(void(*)(void*))ACLFreeUser);
raxFreeWithCallback(Users, ACLFreeUserGeneric);
Users = old_users;
errors = sdscat(errors,"WARNING: ACL errors detected, no change to the previously active ACL rules was performed");
return errors;

View File

@ -61,6 +61,11 @@ void listRelease(list *list)
zfree(list);
}
/* Generic version of listRelease. */
void listReleaseGeneric(void *list) {
listRelease((struct list*)list);
}
/* Add a new node to the list, to head, containing the specified 'value'
* pointer as value.
*

View File

@ -51,6 +51,7 @@ typedef struct list {
/* Prototypes */
list *listCreate(void);
void listRelease(list *list);
void listReleaseGeneric(void *list);
void listEmpty(list *list);
list *listAddNodeHead(list *list, void *value);
list *listAddNodeTail(list *list, void *value);

View File

@ -42,7 +42,7 @@
#endif
#endif
#define INITIAL_EVENT 1024
aeEventLoop *aeCreateEventLoop(int setsize) {
aeEventLoop *eventLoop;
int i;
@ -50,8 +50,9 @@ aeEventLoop *aeCreateEventLoop(int setsize) {
monotonicInit(); /* just in case the calling app didn't initialize */
if ((eventLoop = zmalloc(sizeof(*eventLoop))) == NULL) goto err;
eventLoop->events = zmalloc(sizeof(aeFileEvent)*setsize);
eventLoop->fired = zmalloc(sizeof(aeFiredEvent)*setsize);
eventLoop->nevents = setsize < INITIAL_EVENT ? setsize : INITIAL_EVENT;
eventLoop->events = zmalloc(sizeof(aeFileEvent)*eventLoop->nevents);
eventLoop->fired = zmalloc(sizeof(aeFiredEvent)*eventLoop->nevents);
if (eventLoop->events == NULL || eventLoop->fired == NULL) goto err;
eventLoop->setsize = setsize;
eventLoop->timeEventHead = NULL;
@ -61,10 +62,11 @@ aeEventLoop *aeCreateEventLoop(int setsize) {
eventLoop->beforesleep = NULL;
eventLoop->aftersleep = NULL;
eventLoop->flags = 0;
memset(eventLoop->privdata, 0, sizeof(eventLoop->privdata));
if (aeApiCreate(eventLoop) == -1) goto err;
/* Events with mask == AE_NONE are not set. So let's initialize the
* vector with it. */
for (i = 0; i < setsize; i++)
for (i = 0; i < eventLoop->nevents; i++)
eventLoop->events[i].mask = AE_NONE;
return eventLoop;
@ -102,20 +104,19 @@ void aeSetDontWait(aeEventLoop *eventLoop, int noWait) {
*
* Otherwise AE_OK is returned and the operation is successful. */
int aeResizeSetSize(aeEventLoop *eventLoop, int setsize) {
int i;
if (setsize == eventLoop->setsize) return AE_OK;
if (eventLoop->maxfd >= setsize) return AE_ERR;
if (aeApiResize(eventLoop,setsize) == -1) return AE_ERR;
eventLoop->events = zrealloc(eventLoop->events,sizeof(aeFileEvent)*setsize);
eventLoop->fired = zrealloc(eventLoop->fired,sizeof(aeFiredEvent)*setsize);
eventLoop->setsize = setsize;
/* Make sure that if we created new slots, they are initialized with
* an AE_NONE mask. */
for (i = eventLoop->maxfd+1; i < setsize; i++)
eventLoop->events[i].mask = AE_NONE;
/* If the current allocated space is larger than the requested size,
* we need to shrink it to the requested size. */
if (setsize < eventLoop->nevents) {
eventLoop->events = zrealloc(eventLoop->events,sizeof(aeFileEvent)*setsize);
eventLoop->fired = zrealloc(eventLoop->fired,sizeof(aeFiredEvent)*setsize);
eventLoop->nevents = setsize;
}
return AE_OK;
}
@ -147,6 +148,22 @@ int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask,
errno = ERANGE;
return AE_ERR;
}
/* Resize the events and fired arrays if the file
* descriptor exceeds the current number of events. */
if (unlikely(fd >= eventLoop->nevents)) {
int newnevents = eventLoop->nevents;
newnevents = (newnevents * 2 > fd + 1) ? newnevents * 2 : fd + 1;
newnevents = (newnevents > eventLoop->setsize) ? eventLoop->setsize : newnevents;
eventLoop->events = zrealloc(eventLoop->events, sizeof(aeFileEvent) * newnevents);
eventLoop->fired = zrealloc(eventLoop->fired, sizeof(aeFiredEvent) * newnevents);
/* Initialize new slots with an AE_NONE mask */
for (int i = eventLoop->nevents; i < newnevents; i++)
eventLoop->events[i].mask = AE_NONE;
eventLoop->nevents = newnevents;
}
aeFileEvent *fe = &eventLoop->events[fd];
if (aeApiAddEvent(eventLoop, fd, mask) == -1)

View File

@ -79,6 +79,7 @@ typedef struct aeEventLoop {
int maxfd; /* highest file descriptor currently registered */
int setsize; /* max number of file descriptors tracked */
long long timeEventNextId;
int nevents; /* Size of Registered events */
aeFileEvent *events; /* Registered events */
aeFiredEvent *fired; /* Fired events */
aeTimeEvent *timeEventHead;
@ -87,6 +88,7 @@ typedef struct aeEventLoop {
aeBeforeSleepProc *beforesleep;
aeBeforeSleepProc *aftersleep;
int flags;
void *privdata[2];
} aeEventLoop;
/* Prototypes */

View File

@ -32,7 +32,7 @@
* (if the flag was 0 -> set to 1, if it's already 1 -> do nothing, but the final result is that the flag is set),
* and also it has a full barrier (__sync_lock_test_and_set has acquire barrier).
*
* NOTE2: Unlike other atomic type, which aren't guaranteed to be lock free, c11 atmoic_flag does.
* NOTE2: Unlike other atomic type, which aren't guaranteed to be lock free, c11 atomic_flag does.
* To check whether a type is lock free, atomic_is_lock_free() can be used.
* It can be considered to limit the flag type to atomic_flag to improve performance.
*

View File

@ -489,22 +489,27 @@ int getBitfieldTypeFromArgument(client *c, robj *o, int *sign, int *bits) {
* bits to a string object. The command creates or pad with zeroes the string
* so that the 'maxbit' bit can be addressed. The object is finally
* returned. Otherwise if the key holds a wrong type NULL is returned and
* an error is sent to the client. */
robj *lookupStringForBitCommand(client *c, uint64_t maxbit, int *dirty) {
* an error is sent to the client.
*
* (Must provide all the arguments to the function)
*/
static robj *lookupStringForBitCommand(client *c, uint64_t maxbit,
size_t *strOldSize, size_t *strGrowSize)
{
size_t byte = maxbit >> 3;
robj *o = lookupKeyWrite(c->db,c->argv[1]);
if (checkType(c,o,OBJ_STRING)) return NULL;
if (dirty) *dirty = 0;
if (o == NULL) {
o = createObject(OBJ_STRING,sdsnewlen(NULL, byte+1));
dbAdd(c->db,c->argv[1],o);
if (dirty) *dirty = 1;
*strGrowSize = byte + 1;
*strOldSize = 0;
} else {
o = dbUnshareStringValue(c->db,c->argv[1],o);
size_t oldlen = sdslen(o->ptr);
*strOldSize = sdslen(o->ptr);
o->ptr = sdsgrowzero(o->ptr,byte+1);
if (dirty && oldlen != sdslen(o->ptr)) *dirty = 1;
*strGrowSize = sdslen(o->ptr) - *strOldSize;
}
return o;
}
@ -561,8 +566,9 @@ void setbitCommand(client *c) {
return;
}
int dirty;
if ((o = lookupStringForBitCommand(c,bitoffset,&dirty)) == NULL) return;
size_t strOldSize, strGrowSize;
if ((o = lookupStringForBitCommand(c,bitoffset,&strOldSize,&strGrowSize)) == NULL)
return;
/* Get current values */
byte = bitoffset >> 3;
@ -573,7 +579,7 @@ void setbitCommand(client *c) {
/* Either it is newly created, changed length, or the bit changes before and after.
* Note that the bitval here is actually a decimal number.
* So we need to use `!!` to convert it to 0 or 1 for comparison. */
if (dirty || (!!bitval != on)) {
if (strGrowSize || (!!bitval != on)) {
/* Update byte with new bit value. */
byteval &= ~(1 << bit);
byteval |= ((on & 0x1) << bit);
@ -581,6 +587,13 @@ void setbitCommand(client *c) {
signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_STRING,"setbit",c->argv[1],c->db->id);
server.dirty++;
/* If this is not a new key (old size not 0) and size changed, then
* update the keysizes histogram. Otherwise, the histogram already
* updated in lookupStringForBitCommand() by calling dbAdd(). */
if ((strOldSize > 0) && (strGrowSize != 0))
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_STRING,
strOldSize, strOldSize + strGrowSize);
}
/* Return original value. */
@ -1065,7 +1078,8 @@ struct bitfieldOp {
void bitfieldGeneric(client *c, int flags) {
robj *o;
uint64_t bitoffset;
int j, numops = 0, changes = 0, dirty = 0;
int j, numops = 0, changes = 0;
size_t strOldSize, strGrowSize = 0;
struct bitfieldOp *ops = NULL; /* Array of ops to execute at end. */
int owtype = BFOVERFLOW_WRAP; /* Overflow type. */
int readonly = 1;
@ -1159,7 +1173,7 @@ void bitfieldGeneric(client *c, int flags) {
/* Lookup by making room up to the farthest bit reached by
* this operation. */
if ((o = lookupStringForBitCommand(c,
highest_write_offset,&dirty)) == NULL) {
highest_write_offset,&strOldSize,&strGrowSize)) == NULL) {
zfree(ops);
return;
}
@ -1209,7 +1223,7 @@ void bitfieldGeneric(client *c, int flags) {
setSignedBitfield(o->ptr,thisop->offset,
thisop->bits,newval);
if (dirty || (oldval != newval))
if (strGrowSize || (oldval != newval))
changes++;
} else {
addReplyNull(c);
@ -1243,7 +1257,7 @@ void bitfieldGeneric(client *c, int flags) {
setUnsignedBitfield(o->ptr,thisop->offset,
thisop->bits,newval);
if (dirty || (oldval != newval))
if (strGrowSize || (oldval != newval))
changes++;
} else {
addReplyNull(c);
@ -1286,6 +1300,14 @@ void bitfieldGeneric(client *c, int flags) {
}
if (changes) {
/* If this is not a new key (old size not 0) and size changed, then
* update the keysizes histogram. Otherwise, the histogram already
* updated in lookupStringForBitCommand() by calling dbAdd(). */
if ((strOldSize > 0) && (strGrowSize != 0))
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_STRING,
strOldSize, strOldSize + strGrowSize);
signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_STRING,"setbit",c->argv[1],c->db->id);
server.dirty += changes;

View File

@ -533,7 +533,7 @@ CallReply *callReplyCreateError(sds reply, void *private_data) {
sdsfree(reply);
}
list *deferred_error_list = listCreate();
listSetFreeMethod(deferred_error_list, (void (*)(void*))sdsfree);
listSetFreeMethod(deferred_error_list, sdsfreegeneric);
listAddNodeTail(deferred_error_list, sdsnew(err_buff));
return callReplyCreate(err_buff, deferred_error_list, private_data);
}

View File

@ -317,7 +317,7 @@ migrateCachedSocket* migrateGetSocket(client *c, robj *host, robj *port, long ti
}
/* Create the connection */
conn = connCreate(connTypeOfCluster());
conn = connCreate(server.el, connTypeOfCluster());
if (connBlockingConnect(conn, host->ptr, atoi(port->ptr), timeout)
!= C_OK) {
addReplyError(c,"-IOERR error or timeout connecting to the client");

View File

@ -1262,7 +1262,7 @@ void clusterAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask) {
return;
}
connection *conn = connCreateAccepted(connTypeOfCluster(), cfd, &require_auth);
connection *conn = connCreateAccepted(server.el, connTypeOfCluster(), cfd, &require_auth);
/* Make sure connection is not in an error state */
if (connGetState(conn) != CONN_STATE_ACCEPTING) {
@ -4583,7 +4583,7 @@ static int clusterNodeCronHandleReconnect(clusterNode *node, mstime_t handshake_
if (node->link == NULL) {
clusterLink *link = createClusterLink(node);
link->conn = connCreate(connTypeOfCluster());
link->conn = connCreate(server.el, connTypeOfCluster());
connSetPrivateData(link->conn, link);
if (connConnect(link->conn, node->ip, node->cport, server.bind_source_addr,
clusterLinkConnectHandler) == C_ERR) {

View File

@ -1239,6 +1239,9 @@ commandHistory CLIENT_LIST_History[] = {
{"6.2.0","Added `argv-mem`, `tot-mem`, `laddr` and `redir` fields and the optional `ID` filter."},
{"7.0.0","Added `resp`, `multi-mem`, `rbs` and `rbp` fields."},
{"7.0.3","Added `ssub` field."},
{"7.2.0","Added `lib-name` and `lib-ver` fields."},
{"7.4.0","Added `watch` field."},
{"8.0.0","Added `io-thread` field."},
};
#endif
@ -1546,7 +1549,7 @@ struct COMMAND_STRUCT CLIENT_Subcommands[] = {
{MAKE_CMD("id","Returns the unique client ID of the connection.","O(1)","5.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_ID_History,0,CLIENT_ID_Tips,0,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_ID_Keyspecs,0,NULL,0)},
{MAKE_CMD("info","Returns information about the connection.","O(1)","6.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_INFO_History,0,CLIENT_INFO_Tips,1,clientCommand,2,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_INFO_Keyspecs,0,NULL,0)},
{MAKE_CMD("kill","Terminates open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_KILL_History,6,CLIENT_KILL_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_KILL_Keyspecs,0,NULL,1),.args=CLIENT_KILL_Args},
{MAKE_CMD("list","Lists open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_LIST_History,6,CLIENT_LIST_Tips,1,clientCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_LIST_Keyspecs,0,NULL,2),.args=CLIENT_LIST_Args},
{MAKE_CMD("list","Lists open connections.","O(N) where N is the number of client connections","2.4.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_LIST_History,9,CLIENT_LIST_Tips,1,clientCommand,-2,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_LIST_Keyspecs,0,NULL,2),.args=CLIENT_LIST_Args},
{MAKE_CMD("no-evict","Sets the client eviction mode of the connection.","O(1)","7.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_NO_EVICT_History,0,CLIENT_NO_EVICT_Tips,0,clientCommand,3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_NO_EVICT_Keyspecs,0,NULL,1),.args=CLIENT_NO_EVICT_Args},
{MAKE_CMD("no-touch","Controls whether commands sent by the client affect the LRU/LFU of accessed keys.","O(1)","7.2.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_NO_TOUCH_History,0,CLIENT_NO_TOUCH_Tips,0,clientCommand,3,CMD_NOSCRIPT|CMD_LOADING|CMD_STALE,ACL_CATEGORY_CONNECTION,CLIENT_NO_TOUCH_Keyspecs,0,NULL,1),.args=CLIENT_NO_TOUCH_Args},
{MAKE_CMD("pause","Suspends commands processing.","O(1)","3.0.0",CMD_DOC_NONE,NULL,NULL,"connection",COMMAND_GROUP_CONNECTION,CLIENT_PAUSE_History,1,CLIENT_PAUSE_Tips,0,clientCommand,-3,CMD_ADMIN|CMD_NOSCRIPT|CMD_LOADING|CMD_STALE|CMD_SENTINEL,ACL_CATEGORY_CONNECTION,CLIENT_PAUSE_Keyspecs,0,NULL,2),.args=CLIENT_PAUSE_Args},

View File

@ -31,6 +31,18 @@
[
"7.0.3",
"Added `ssub` field."
],
[
"7.2.0",
"Added `lib-name` and `lib-ver` fields."
],
[
"7.4.0",
"Added `watch` field."
],
[
"8.0.0",
"Added `io-thread` field."
]
],
"command_flags": [

View File

@ -44,6 +44,9 @@
"lua.caches": {
"type": "integer"
},
"script.VMs": {
"type": "integer"
},
"functions.caches": {
"type": "integer"
},

View File

@ -3,6 +3,9 @@
* Copyright (c) 2009-Present, Redis Ltd.
* All rights reserved.
*
* Copyright (c) 2024-present, Valkey contributors.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
*
@ -268,7 +271,7 @@ dict *configs = NULL; /* Runtime config values */
/* Lookup a config by the provided sds string name, or return NULL
* if the config does not exist */
static standardConfig *lookupConfig(sds name) {
static standardConfig *lookupConfig(const sds name) {
dictEntry *de = dictFind(configs, name);
return de ? dictGetVal(de) : NULL;
}
@ -430,6 +433,7 @@ void loadServerConfigFromString(char *config) {
{"list-max-ziplist-entries", 2, 2},
{"list-max-ziplist-value", 2, 2},
{"lua-replicate-commands", 2, 2},
{"io-threads-do-reads", 2, 2},
{NULL, 0},
};
char buf[1024];
@ -552,16 +556,6 @@ void loadServerConfigFromString(char *config) {
}
} else if (!strcasecmp(argv[0],"loadmodule") && argc >= 2) {
queueLoadModule(argv[1],&argv[2],argc-2);
} else if (strchr(argv[0], '.')) {
if (argc < 2) {
err = "Module config specified without value";
goto loaderr;
}
sds name = sdsdup(argv[0]);
sds val = sdsdup(argv[1]);
for (int i = 2; i < argc; i++)
val = sdscatfmt(val, " %S", argv[i]);
if (!dictReplace(server.module_configs_queue, name, val)) sdsfree(name);
} else if (!strcasecmp(argv[0],"sentinel")) {
/* argc == 1 is handled by main() as we need to enter the sentinel
* mode ASAP. */
@ -573,7 +567,20 @@ void loadServerConfigFromString(char *config) {
queueSentinelConfig(argv+1,argc-1,linenum,lines[i]);
}
} else {
err = "Bad directive or wrong number of arguments"; goto loaderr;
/* Collect all unknown configurations into `module_configs_queue`.
* These may include valid module configurations or invalid ones.
* They will be validated later by loadModuleConfigs() against the
* configurations declared by the loaded module(s). */
if (argc < 2) {
err = "Bad directive or wrong number of arguments";
goto loaderr;
}
sds name = sdsdup(argv[0]);
sds val = sdsdup(argv[1]);
for (int i = 2; i < argc; i++)
val = sdscatfmt(val, " %S", argv[i]);
if (!dictReplace(server.module_configs_queue, name, val)) sdsfree(name);
}
sdsfreesplitres(argv,argc);
argv = NULL;
@ -2547,11 +2554,10 @@ static int updateMaxclients(const char **err) {
*err = msg;
return 0;
}
if ((unsigned int) aeGetSetSize(server.el) <
server.maxclients + CONFIG_FDSET_INCR)
{
if (aeResizeSetSize(server.el,
server.maxclients + CONFIG_FDSET_INCR) == AE_ERR)
size_t newsize = server.maxclients + CONFIG_FDSET_INCR;
if ((unsigned int) aeGetSetSize(server.el) < newsize) {
if (aeResizeSetSize(server.el, newsize) == AE_ERR ||
resizeAllIOThreadsEventLoops(newsize) == AE_ERR)
{
*err = "The event loop API used by Redis is not able to handle the specified number of clients";
return 0;
@ -3032,6 +3038,7 @@ static int applyClientMaxMemoryUsage(const char **err) {
if (server.maxmemory_clients != 0)
initServerClientMemUsageBuckets();
pauseAllIOThreads();
/* When client eviction is enabled update memory buckets for all clients.
* When disabled, clear that data structure. */
listRewind(server.clients, &li);
@ -3045,6 +3052,7 @@ static int applyClientMaxMemoryUsage(const char **err) {
updateClientMemUsageAndBucket(c);
}
}
resumeAllIOThreads();
if (server.maxmemory_clients == 0)
freeServerClientMemUsageBuckets();
@ -3071,6 +3079,7 @@ standardConfig static_configs[] = {
createBoolConfig("lazyfree-lazy-user-flush", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.lazyfree_lazy_user_flush , 0, NULL, NULL),
createBoolConfig("repl-disable-tcp-nodelay", NULL, MODIFIABLE_CONFIG, server.repl_disable_tcp_nodelay, 0, NULL, NULL),
createBoolConfig("repl-diskless-sync", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, server.repl_diskless_sync, 1, NULL, NULL),
createBoolConfig("repl-rdb-channel", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, server.repl_rdb_channel, 1, NULL, NULL),
createBoolConfig("aof-rewrite-incremental-fsync", NULL, MODIFIABLE_CONFIG, server.aof_rewrite_incremental_fsync, 1, NULL, NULL),
createBoolConfig("no-appendfsync-on-rewrite", NULL, MODIFIABLE_CONFIG, server.aof_no_fsync_on_rewrite, 0, NULL, NULL),
createBoolConfig("cluster-require-full-coverage", NULL, MODIFIABLE_CONFIG, server.cluster_require_full_coverage, 1, NULL, NULL),
@ -3213,6 +3222,7 @@ standardConfig static_configs[] = {
createLongLongConfig("proto-max-bulk-len", NULL, DEBUG_CONFIG | MODIFIABLE_CONFIG, 1024*1024, LONG_MAX, server.proto_max_bulk_len, 512ll*1024*1024, MEMORY_CONFIG, NULL, NULL), /* Bulk request max size */
createLongLongConfig("stream-node-max-entries", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.stream_node_max_entries, 100, INTEGER_CONFIG, NULL, NULL),
createLongLongConfig("repl-backlog-size", NULL, MODIFIABLE_CONFIG, 1, LLONG_MAX, server.repl_backlog_size, 1024*1024, MEMORY_CONFIG, NULL, updateReplBacklogSize), /* Default: 1mb */
createLongLongConfig("replica-full-sync-buffer-limit", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.repl_full_sync_buffer_limit, 0, MEMORY_CONFIG, NULL, NULL), /* Default: Inherits 'client-output-buffer-limit <replica>' */
/* Unsigned Long Long configs */
createULongLongConfig("maxmemory", NULL, MODIFIABLE_CONFIG, 0, ULLONG_MAX, server.maxmemory, 0, MEMORY_CONFIG, NULL, updateMaxmemory),
@ -3312,16 +3322,34 @@ void removeConfig(sds name) {
standardConfig *config = lookupConfig(name);
if (!config) return;
if (config->flags & MODULE_CONFIG) {
sdsfree((sds) config->name);
if (config->type == ENUM_CONFIG) {
sdsfree((sds) config->alias);
switch (config->type) {
case BOOL_CONFIG:
break;
case NUMERIC_CONFIG:
break;
case SDS_CONFIG:
if (config->data.sds.default_value)
sdsfree((sds)config->data.sds.default_value);
break;
case ENUM_CONFIG:
{
configEnum *enumNode = config->data.enumd.enum_value;
while(enumNode->name != NULL) {
zfree(enumNode->name);
enumNode++;
}
zfree(config->data.enumd.enum_value);
} else if (config->type == SDS_CONFIG) {
if (config->data.sds.default_value) sdsfree((sds)config->data.sds.default_value);
}
break;
case SPECIAL_CONFIG: /* Not used by modules */
case STRING_CONFIG: /* Not used by modules */
default:
serverAssert(0);
break;
}
}
dictDelete(configs, name);
@ -3332,40 +3360,77 @@ void removeConfig(sds name) {
*----------------------------------------------------------------------------*/
/* Create a bool/string/enum/numeric standardConfig for a module config in the configs dictionary */
void addModuleBoolConfig(const char *module_name, const char *name, int flags, void *privdata, int default_val) {
sds config_name = sdscatfmt(sdsempty(), "%s.%s", module_name, name);
/* On removeConfig(), name and alias will be sdsfree() */
void addModuleBoolConfig(sds name, sds alias, int flags, void *privdata, int default_val) {
int config_dummy_address;
standardConfig module_config = createBoolConfig(config_name, NULL, flags | MODULE_CONFIG, config_dummy_address, default_val, NULL, NULL);
module_config.data.yesno.config = NULL;
module_config.privdata = privdata;
registerConfigValue(config_name, &module_config, 0);
standardConfig sc = createBoolConfig(name, alias, flags | MODULE_CONFIG, config_dummy_address, default_val, NULL, NULL);
sc.data.yesno.config = NULL;
sc.privdata = privdata;
registerConfigValue(name, &sc, 0);
/* If alias available, deep copy standardConfig and register again */
if (alias) {
sc.name = sdsdup(name);
sc.alias = sdsdup(alias);
registerConfigValue(sc.alias, &sc, 1);
}
}
void addModuleStringConfig(const char *module_name, const char *name, int flags, void *privdata, sds default_val) {
sds config_name = sdscatfmt(sdsempty(), "%s.%s", module_name, name);
/* On removeConfig(), name, default_val, and alias will be sdsfree() */
void addModuleStringConfig(sds name, sds alias, int flags, void *privdata, sds default_val) {
sds config_dummy_address;
standardConfig module_config = createSDSConfig(config_name, NULL, flags | MODULE_CONFIG, 0, config_dummy_address, default_val, NULL, NULL);
module_config.data.sds.config = NULL;
module_config.privdata = privdata;
registerConfigValue(config_name, &module_config, 0);
standardConfig sc = createSDSConfig(name, alias, flags | MODULE_CONFIG, 0, config_dummy_address, default_val, NULL, NULL);
sc.data.sds.config = NULL;
sc.privdata = privdata;
registerConfigValue(name, &sc, 0); /* memcpy sc */
/* If alias available, deep copy standardConfig and register again */
if (alias) {
sc.name = sdsdup(name);
sc.alias = sdsdup(alias);
if (default_val) sc.data.sds.default_value = sdsdup(default_val);
registerConfigValue(sc.alias, &sc, 1);
}
}
void addModuleEnumConfig(const char *module_name, const char *name, int flags, void *privdata, int default_val, configEnum *enum_vals) {
sds config_name = sdscatfmt(sdsempty(), "%s.%s", module_name, name);
/* On removeConfig(), name, default_val, alias and enum_vals will be freed */
void addModuleEnumConfig(sds name, sds alias, int flags, void *privdata, int default_val, configEnum *enum_vals, int num_enum_vals) {
int config_dummy_address;
standardConfig module_config = createEnumConfig(config_name, NULL, flags | MODULE_CONFIG, enum_vals, config_dummy_address, default_val, NULL, NULL);
module_config.data.enumd.config = NULL;
module_config.privdata = privdata;
registerConfigValue(config_name, &module_config, 0);
standardConfig sc = createEnumConfig(name, alias, flags | MODULE_CONFIG, enum_vals, config_dummy_address, default_val, NULL, NULL);
sc.data.enumd.config = NULL;
sc.privdata = privdata;
registerConfigValue(name, &sc, 0);
/* If alias available, deep copy standardConfig and register again */
if (alias) {
sc.name = sdsdup(name);
sc.alias = sdsdup(alias);
sc.data.enumd.enum_value = zmalloc((num_enum_vals + 1) * sizeof(configEnum));
for (int i = 0; i < num_enum_vals; i++) {
sc.data.enumd.enum_value[i].name = zstrdup(enum_vals[i].name);
sc.data.enumd.enum_value[i].val = enum_vals[i].val;
}
sc.data.enumd.enum_value[num_enum_vals].name = NULL;
sc.data.enumd.enum_value[num_enum_vals].val = 0;
registerConfigValue(sc.alias, &sc, 1);
}
}
void addModuleNumericConfig(const char *module_name, const char *name, int flags, void *privdata, long long default_val, int conf_flags, long long lower, long long upper) {
sds config_name = sdscatfmt(sdsempty(), "%s.%s", module_name, name);
/* On removeConfig(), it will free name, and alias if it is not NULL */
void addModuleNumericConfig(sds name, sds alias, int flags, void *privdata, long long default_val, int conf_flags, long long lower, long long upper) {
long long config_dummy_address;
standardConfig module_config = createLongLongConfig(config_name, NULL, flags | MODULE_CONFIG, lower, upper, config_dummy_address, default_val, conf_flags, NULL, NULL);
module_config.data.numeric.config.ll = NULL;
module_config.privdata = privdata;
registerConfigValue(config_name, &module_config, 0);
standardConfig sc = createLongLongConfig(name, alias, flags | MODULE_CONFIG, lower, upper, config_dummy_address, default_val, conf_flags, NULL, NULL);
sc.data.numeric.config.ll = NULL;
sc.privdata = privdata;
registerConfigValue(name, &sc, 0);
/* If alias available, deep copy standardConfig and register again */
if (alias) {
sc.name = sdsdup(name);
sc.alias = sdsdup(alias);
registerConfigValue(sc.alias, &sc, 1);
}
}
/*-----------------------------------------------------------------------------
@ -3418,3 +3483,7 @@ void configRewriteCommand(client *c) {
addReply(c,shared.ok);
}
}
int configExists(const sds name) {
return lookupConfig(name) != NULL;
}

View File

@ -47,6 +47,7 @@
#define HAVE_PROC_SMAPS 1
#define HAVE_PROC_SOMAXCONN 1
#define HAVE_PROC_OOM_SCORE_ADJ 1
#define HAVE_EVENT_FD 1
#endif
/* Test for task_info() */
@ -101,6 +102,25 @@
#endif
#endif
/* Test for __builtin_prefetch()
* Supported in LLVM since 2.9: https://releases.llvm.org/2.9/docs/ReleaseNotes.html
* Supported in GCC since 3.1 but we use 4.9 given it's too old: https://gcc.gnu.org/gcc-3.1/changes.html. */
#if defined(__clang__) && (__clang_major__ > 2 || (__clang_major__ == 2 && __clang_minor__ >= 9))
#define HAS_BUILTIN_PREFETCH 1
#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9))
#define HAS_BUILTIN_PREFETCH 1
#else
#define HAS_BUILTIN_PREFETCH 0
#endif
#if HAS_BUILTIN_PREFETCH
#define redis_prefetch_read(addr) __builtin_prefetch(addr, 0, 3) /* Read with high locality */
#define redis_prefetch_write(addr) __builtin_prefetch(addr, 1, 3) /* Write with high locality */
#else
#define redis_prefetch_read(addr) ((void)(addr)) /* No-op if unsupported */
#define redis_prefetch_write(addr) ((void)(addr)) /* No-op if unsupported */
#endif
/* Define redis_fsync to fdatasync() in Linux and fsync() for all the rest */
#if defined(__linux__)
#define redis_fsync(fd) fdatasync(fd)
@ -318,4 +338,17 @@ void setcpuaffinity(const char *cpulist);
#define ATTRIBUTE_TARGET_POPCNT
#endif
/* Check if we can compile AVX2 code */
#if defined (__x86_64__) && ((defined(__GNUC__) && __GNUC__ >= 5) || (defined(__clang__) && __clang_major__ >= 4))
#if defined(__has_attribute) && __has_attribute(target)
#define HAVE_AVX2
#endif
#endif
#if defined (HAVE_AVX2)
#define ATTRIBUTE_TARGET_AVX2 __attribute__((target("avx2")))
#else
#define ATTRIBUTE_TARGET_AVX2
#endif
#endif

View File

@ -156,14 +156,14 @@ void connTypeCleanupAll(void) {
}
/* walk all the connection types until has pending data */
int connTypeHasPendingData(void) {
int connTypeHasPendingData(struct aeEventLoop *el) {
ConnectionType *ct;
int type;
int ret = 0;
for (type = 0; type < CONN_TYPE_MAX; type++) {
ct = connTypes[type];
if (ct && ct->has_pending_data && (ret = ct->has_pending_data())) {
if (ct && ct->has_pending_data && (ret = ct->has_pending_data(el))) {
return ret;
}
}
@ -172,7 +172,7 @@ int connTypeHasPendingData(void) {
}
/* walk all the connection types and process pending data for each connection type */
int connTypeProcessPendingData(void) {
int connTypeProcessPendingData(struct aeEventLoop *el) {
ConnectionType *ct;
int type;
int ret = 0;
@ -180,7 +180,7 @@ int connTypeProcessPendingData(void) {
for (type = 0; type < CONN_TYPE_MAX; type++) {
ct = connTypes[type];
if (ct && ct->process_pending_data) {
ret += ct->process_pending_data();
ret += ct->process_pending_data(el);
}
}

View File

@ -60,8 +60,8 @@ typedef struct ConnectionType {
int (*listen)(connListener *listener);
/* create/shutdown/close connection */
connection* (*conn_create)(void);
connection* (*conn_create_accepted)(int fd, void *priv);
connection* (*conn_create)(struct aeEventLoop *el);
connection* (*conn_create_accepted)(struct aeEventLoop *el, int fd, void *priv);
void (*shutdown)(struct connection *conn);
void (*close)(struct connection *conn);
@ -81,9 +81,13 @@ typedef struct ConnectionType {
ssize_t (*sync_read)(struct connection *conn, char *ptr, ssize_t size, long long timeout);
ssize_t (*sync_readline)(struct connection *conn, char *ptr, ssize_t size, long long timeout);
/* event loop */
void (*unbind_event_loop)(struct connection *conn);
int (*rebind_event_loop)(struct connection *conn, aeEventLoop *el);
/* pending data */
int (*has_pending_data)(void);
int (*process_pending_data)(void);
int (*has_pending_data)(struct aeEventLoop *el);
int (*process_pending_data)(struct aeEventLoop *el);
/* TLS specified methods */
sds (*get_peer_cert)(struct connection *conn);
@ -98,6 +102,7 @@ struct connection {
short int refs;
unsigned short int iovcnt;
void *private_data;
struct aeEventLoop *el;
ConnectionCallbackFunc conn_handler;
ConnectionCallbackFunc write_handler;
ConnectionCallbackFunc read_handler;
@ -319,6 +324,28 @@ static inline int connHasReadHandler(connection *conn) {
return conn->read_handler != NULL;
}
/* Returns true if the connection is bound to an event loop */
static inline int connHasEventLoop(connection *conn) {
return conn->el != NULL;
}
/* Unbind the current event loop from the connection, so that it can be
* rebind to a different event loop in the future. */
static inline void connUnbindEventLoop(connection *conn) {
if (conn->el == NULL) return;
connSetReadHandler(conn, NULL);
connSetWriteHandler(conn, NULL);
if (conn->type->unbind_event_loop)
conn->type->unbind_event_loop(conn);
conn->el = NULL;
}
/* Rebind the connection to another event loop, read/write handlers must not
* be installed in the current event loop */
static inline int connRebindEventLoop(connection *conn, aeEventLoop *el) {
return conn->type->rebind_event_loop(conn, el);
}
/* Associate a private data pointer with the connection */
static inline void connSetPrivateData(connection *conn, void *data) {
conn->private_data = data;
@ -379,14 +406,14 @@ ConnectionType *connectionTypeUnix(void);
int connectionIndexByType(const char *typename);
/* Create a connection of specified type */
static inline connection *connCreate(ConnectionType *ct) {
return ct->conn_create();
static inline connection *connCreate(struct aeEventLoop *el, ConnectionType *ct) {
return ct->conn_create(el);
}
/* Create an accepted connection of specified type.
* priv is connection type specified argument */
static inline connection *connCreateAccepted(ConnectionType *ct, int fd, void *priv) {
return ct->conn_create_accepted(fd, priv);
static inline connection *connCreateAccepted(struct aeEventLoop *el, ConnectionType *ct, int fd, void *priv) {
return ct->conn_create_accepted(el, fd, priv);
}
/* Configure a connection type. A typical case is to configure TLS.
@ -400,10 +427,10 @@ static inline int connTypeConfigure(ConnectionType *ct, void *priv, int reconfig
void connTypeCleanupAll(void);
/* Test all the connection type has pending data or not. */
int connTypeHasPendingData(void);
int connTypeHasPendingData(struct aeEventLoop *el);
/* walk all the connection types and process pending data for each connection type */
int connTypeProcessPendingData(void);
int connTypeProcessPendingData(struct aeEventLoop *el);
/* Listen on an initialized listener */
static inline int connListen(connListener *listener) {

View File

@ -26,8 +26,11 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE. */
#include <stdlib.h>
#include "crc64.h"
#include "crcspeed.h"
#include "redisassert.h"
#include "testhelp.h"
static uint64_t crc64_table[8][256] = {{0}};
#define POLY UINT64_C(0xad93d23594c935a9)
@ -67,14 +70,33 @@ static uint64_t crc64_table[8][256] = {{0}};
* \return The reflected data.
*****************************************************************************/
static inline uint_fast64_t crc_reflect(uint_fast64_t data, size_t data_len) {
uint_fast64_t ret = data & 0x01;
/* only ever called for data_len == 64 in this codebase
*
* Borrowed from bit twiddling hacks, original in the public domain.
* https://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
* Extended to 64 bits, and added byteswap for final 3 steps.
* 16-30x 64-bit operations, no comparisons (16 for native byteswap, 30 for pure C)
*/
for (size_t i = 1; i < data_len; i++) {
data >>= 1;
ret = (ret << 1) | (data & 0x01);
}
return ret;
assert(data_len <= 64);
/* swap odd and even bits */
data = ((data >> 1) & 0x5555555555555555ULL) | ((data & 0x5555555555555555ULL) << 1);
/* swap consecutive pairs */
data = ((data >> 2) & 0x3333333333333333ULL) | ((data & 0x3333333333333333ULL) << 2);
/* swap nibbles ... */
data = ((data >> 4) & 0x0F0F0F0F0F0F0F0FULL) | ((data & 0x0F0F0F0F0F0F0F0FULL) << 4);
#if defined(__GNUC__) || defined(__clang__)
data = __builtin_bswap64(data);
#else
/* swap bytes */
data = ((data >> 8) & 0x00FF00FF00FF00FFULL) | ((data & 0x00FF00FF00FF00FFULL) << 8);
/* swap 2-byte long pairs */
data = ( data >> 16 & 0xFFFF0000FFFFULL) | ((data & 0xFFFF0000FFFFULL) << 16);
/* swap 4-byte quads */
data = ( data >> 32 & 0xFFFFFFFFULL) | ((data & 0xFFFFFFFFULL) << 32);
#endif
/* adjust for non-64-bit reversals */
return data >> (64 - data_len);
}
/**
@ -126,11 +148,121 @@ uint64_t crc64(uint64_t crc, const unsigned char *s, uint64_t l) {
#ifdef REDIS_TEST
#include <stdio.h>
static void genBenchmarkRandomData(char *data, int count);
static int bench_crc64(unsigned char *data, uint64_t size, long long passes, uint64_t check, char *name, int csv);
static void bench_combine(char *label, uint64_t size, uint64_t expect, int csv);
long long _ustime(void);
#include <inttypes.h>
#include <string.h>
#include <stdlib.h>
#include <time.h>
#include <sys/time.h>
#include <unistd.h>
#include "zmalloc.h"
#include "crccombine.h"
long long _ustime(void) {
struct timeval tv;
long long ust;
gettimeofday(&tv, NULL);
ust = ((long long)tv.tv_sec)*1000000;
ust += tv.tv_usec;
return ust;
}
static int bench_crc64(unsigned char *data, uint64_t size, long long passes, uint64_t check, char *name, int csv) {
uint64_t min = size, hash;
long long original_start = _ustime(), original_end;
for (long long i=passes; i > 0; i--) {
hash = crc64(0, data, size);
}
original_end = _ustime();
min = (original_end - original_start) * 1000 / passes;
/* approximate nanoseconds without nstime */
if (csv) {
printf("%s,%" PRIu64 ",%" PRIu64 ",%d\n",
name, size, (1000 * size) / min, hash == check);
} else {
printf("test size=%" PRIu64 " algorithm=%s %" PRIu64 " M/sec matches=%d\n",
size, name, (1000 * size) / min, hash == check);
}
return hash != check;
}
const uint64_t BENCH_RPOLY = UINT64_C(0x95ac9329ac4bc9b5);
static void bench_combine(char *label, uint64_t size, uint64_t expect, int csv) {
uint64_t min = size, start = expect, thash = expect ^ (expect >> 17);
long long original_start = _ustime(), original_end;
for (int i=0; i < 1000; i++) {
crc64_combine(thash, start, size, BENCH_RPOLY, 64);
}
original_end = _ustime();
/* ran 1000 times, want ns per, counted us per 1000 ... */
min = original_end - original_start;
if (csv) {
printf("%s,%" PRIu64 ",%" PRIu64 "\n", label, size, min);
} else {
printf("%s size=%" PRIu64 " in %" PRIu64 " nsec\n", label, size, min);
}
}
static void genBenchmarkRandomData(char *data, int count) {
static uint32_t state = 1234;
int i = 0;
while (count--) {
state = (state*1103515245+12345);
data[i++] = '0'+((state>>16)&63);
}
}
#define UNUSED(x) (void)(x)
int crc64Test(int argc, char *argv[], int flags) {
UNUSED(argc);
UNUSED(argv);
UNUSED(flags);
uint64_t crc64_test_size = 0;
int i, lastarg, csv = 0, loop = 0, combine = 0, testAll = 0;
again:
if ((argc>=4) && (!strcmp(argv[3],"custom"))) {
for (i = 4; i < argc; i++) {
lastarg = (i == (argc - 1));
if (!strcmp(argv[i], "--help")) {
goto usage;
} else if (!strcmp(argv[i], "--csv")) {
csv = 1;
} else if (!strcmp(argv[i], "-l")) {
loop = 1;
} else if (!strcmp(argv[i], "--crc")) {
if (lastarg) goto invalid;
crc64_test_size = atoll(argv[++i]);
} else if (!strcmp(argv[i], "--combine")) {
combine = 1;
} else {
invalid:
printf("Invalid option \"%s\" or option argument missing\n\n",
argv[i]);
usage:
printf(
"Usage: crc64 [OPTIONS]\n\n"
" --csv Output in CSV format\n"
" -l Loop. Run the tests forever\n"
" --crc <bytes> Benchmark crc64 faster options, using a buffer this big, and quit when done.\n"
" --combine Benchmark crc64 combine value ranges and timings.\n"
);
return 1;
}
}
} else {
crc64_test_size = 50000;
testAll = 1;
if (flags & REDIS_TEST_ACCURATE) crc64_test_size = 5000000;
}
if ((crc64_test_size == 0 && combine == 0) || testAll) {
crc64_init();
printf("[calcula]: e9c6d914c4b8d9ca == %016" PRIx64 "\n",
(uint64_t)_crc64(0, "123456789", 9));
@ -148,14 +280,89 @@ int crc64Test(int argc, char *argv[], int flags) {
(uint64_t)_crc64(0, li, sizeof(li)));
printf("[64speed]: c7794709e69683b3 == %016" PRIx64 "\n",
(uint64_t)crc64(0, (unsigned char*)li, sizeof(li)));
if (!testAll) return 0;
}
int init_this_loop = 1;
long long init_start, init_end;
do {
unsigned char* data = NULL;
uint64_t passes = 0;
if (crc64_test_size) {
data = zmalloc(crc64_test_size);
genBenchmarkRandomData((char*)data, crc64_test_size);
/* We want to hash about 1 gig of data in total, looped, to get a good
* idea of our performance.
*/
passes = (UINT64_C(0x100000000) / crc64_test_size);
passes = passes >= 2 ? passes : 2;
passes = passes <= 1000 ? passes : 1000;
}
crc64_init();
/* warm up the cache */
set_crc64_cutoffs(crc64_test_size+1, crc64_test_size+1);
uint64_t expect = crc64(0, data, crc64_test_size);
if ((!combine || testAll) && crc64_test_size) {
if (csv && init_this_loop) printf("algorithm,buffer,performance,crc64_matches\n");
/* get the single-character version for single-byte Redis behavior */
set_crc64_cutoffs(0, crc64_test_size+1);
assert(!bench_crc64(data, crc64_test_size, passes, expect, "crc_1byte", csv));
set_crc64_cutoffs(crc64_test_size+1, crc64_test_size+1);
/* run with 8-byte "single" path, crcfaster */
assert(!(bench_crc64(data, crc64_test_size, passes, expect, "crcspeed", csv)));
/* run with dual 8-byte paths */
set_crc64_cutoffs(1, crc64_test_size+1);
assert(!(bench_crc64(data, crc64_test_size, passes, expect, "crcdual", csv)));
/* run with tri 8-byte paths */
set_crc64_cutoffs(1, 1);
assert(!(bench_crc64(data, crc64_test_size, passes, expect, "crctri", csv)));
/* Be free memory region, be free. */
zfree(data);
data = NULL;
}
uint64_t INIT_SIZE = UINT64_C(0xffffffffffffffff);
if (combine || testAll) {
if (init_this_loop) {
init_start = _ustime();
crc64_combine(
UINT64_C(0xdeadbeefdeadbeef),
UINT64_C(0xfeebdaedfeebdaed),
INIT_SIZE,
BENCH_RPOLY, 64);
init_end = _ustime();
init_end -= init_start;
init_end *= 1000;
if (csv) {
printf("operation,size,nanoseconds\n");
printf("init_64,%" PRIu64 ",%" PRIu64 "\n", INIT_SIZE, (uint64_t)init_end);
} else {
printf("init_64 size=%" PRIu64 " in %" PRIu64 " nsec\n", INIT_SIZE, (uint64_t)init_end);
}
/* use the hash itself as the size (unpredictable) */
bench_combine("hash_as_size_combine", crc64_test_size, expect, csv);
/* let's do something big (predictable, so fast) */
bench_combine("largest_combine", INIT_SIZE, expect, csv);
}
bench_combine("combine", crc64_test_size, expect, csv);
}
init_this_loop = 0;
/* step down by ~1.641 for a range of test sizes */
crc64_test_size -= (crc64_test_size >> 2) + (crc64_test_size >> 3) + (crc64_test_size >> 6);
} while (crc64_test_size > 3);
if (loop) goto again;
return 0;
}
#endif
#ifdef REDIS_TEST_MAIN
int main(int argc, char *argv[]) {
return crc64Test(argc, argv);
}
#endif

252
src/crccombine.c Normal file
View File

@ -0,0 +1,252 @@
#include <stdint.h>
#include <stdio.h>
#include <strings.h>
#if defined(__i386__) || defined(__X86_64__)
#include <immintrin.h>
#endif
#include "crccombine.h"
/* Copyright (C) 2013 Mark Adler
* Copyright (C) 2019-2024 Josiah Carlson
* Portions originally from: crc64.c Version 1.4 16 Dec 2013 Mark Adler
* Modifications by Josiah Carlson <josiah.carlson@gmail.com>
* - Added implementation variations with sample timings for gf_matrix_times*()
* - Most folks would be best using gf2_matrix_times_vec or
* gf2_matrix_times_vec2, unless some processor does AVX2 fast.
* - This is the implementation of the MERGE_CRC macro defined in
* crcspeed.c (which calls crc_combine()), and is a specialization of the
* generic crc_combine() (and related from the 2013 edition of Mark Adler's
* crc64.c)) for the sake of clarity and performance.
This software is provided 'as-is', without any express or implied
warranty. In no event will the author be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
Mark Adler
madler@alumni.caltech.edu
*/
#define STATIC_ASSERT(VVV) do {int test = 1 / (VVV);test++;} while (0)
#if !((defined(__i386__) || defined(__X86_64__)))
/* This cuts 40% of the time vs bit-by-bit. */
uint64_t gf2_matrix_times_switch(uint64_t *mat, uint64_t vec) {
/*
* Without using any vector math, this handles 4 bits at a time,
* and saves 40+% of the time compared to the bit-by-bit version. Use if you
* have no vector compile option available to you. With cache, we see:
* E5-2670 ~1-2us to extend ~1 meg 64 bit hash
*/
uint64_t sum;
sum = 0;
while (vec) {
/* reversing the case order is ~10% slower on Xeon E5-2670 */
switch (vec & 15) {
case 15:
sum ^= *mat ^ *(mat+1) ^ *(mat+2) ^ *(mat+3);
break;
case 14:
sum ^= *(mat+1) ^ *(mat+2) ^ *(mat+3);
break;
case 13:
sum ^= *mat ^ *(mat+2) ^ *(mat+3);
break;
case 12:
sum ^= *(mat+2) ^ *(mat+3);
break;
case 11:
sum ^= *mat ^ *(mat+1) ^ *(mat+3);
break;
case 10:
sum ^= *(mat+1) ^ *(mat+3);
break;
case 9:
sum ^= *mat ^ *(mat+3);
break;
case 8:
sum ^= *(mat+3);
break;
case 7:
sum ^= *mat ^ *(mat+1) ^ *(mat+2);
break;
case 6:
sum ^= *(mat+1) ^ *(mat+2);
break;
case 5:
sum ^= *mat ^ *(mat+2);
break;
case 4:
sum ^= *(mat+2);
break;
case 3:
sum ^= *mat ^ *(mat+1);
break;
case 2:
sum ^= *(mat+1);
break;
case 1:
sum ^= *mat;
break;
default:
break;
}
vec >>= 4;
mat += 4;
}
return sum;
}
#define CRC_MULTIPLY gf2_matrix_times_switch
#else
/*
Warning: here there be dragons involving vector math, and macros to save us
from repeating the same information over and over.
*/
uint64_t gf2_matrix_times_vec2(uint64_t *mat, uint64_t vec) {
/*
* Uses xmm registers on x86, works basically everywhere fast, doing
* cycles of movqda, mov, shr, pand, and, pxor, at least on gcc 8.
* Is 9-11x faster than original.
* E5-2670 ~29us to extend ~1 meg 64 bit hash
* i3-8130U ~22us to extend ~1 meg 64 bit hash
*/
v2uq sum = {0, 0},
*mv2 = (v2uq*)mat;
/* this table allows us to eliminate conditions during gf2_matrix_times_vec2() */
static v2uq masks2[4] = {
{0,0},
{-1,0},
{0,-1},
{-1,-1},
};
/* Almost as beautiful as gf2_matrix_times_vec, but only half as many
* bits per step, so we need 2 per chunk4 operation. Faster in my tests. */
#define DO_CHUNK4() \
sum ^= (*mv2++) & masks2[vec & 3]; \
vec >>= 2; \
sum ^= (*mv2++) & masks2[vec & 3]; \
vec >>= 2
#define DO_CHUNK16() \
DO_CHUNK4(); \
DO_CHUNK4(); \
DO_CHUNK4(); \
DO_CHUNK4()
DO_CHUNK16();
DO_CHUNK16();
DO_CHUNK16();
DO_CHUNK16();
STATIC_ASSERT(sizeof(uint64_t) == 8);
STATIC_ASSERT(sizeof(long long unsigned int) == 8);
return sum[0] ^ sum[1];
}
#undef DO_CHUNK16
#undef DO_CHUNK4
#define CRC_MULTIPLY gf2_matrix_times_vec2
#endif
static void gf2_matrix_square(uint64_t *square, uint64_t *mat, uint8_t dim) {
unsigned n;
for (n = 0; n < dim; n++)
square[n] = CRC_MULTIPLY(mat, mat[n]);
}
/* Turns out our Redis / Jones CRC cycles at this point, so we can support
* more than 64 bits of extension if we want. Trivially. */
static uint64_t combine_cache[64][64];
/* Mark Adler has some amazing updates to crc.c in his crcany repository. I
* like static caches, and not worrying about finding cycles generally. We are
* okay to spend the 32k of memory here, leaving the algorithm unchanged from
* as it was a decade ago, and be happy that it costs <200 microseconds to
* init, and that subsequent calls to the combine function take under 100
* nanoseconds. We also note that the crcany/crc.c code applies to any CRC, and
* we are currently targeting one: Jones CRC64.
*/
void init_combine_cache(uint64_t poly, uint8_t dim) {
unsigned n, cache_num = 0;
combine_cache[1][0] = poly;
int prev = 1;
uint64_t row = 1;
for (n = 1; n < dim; n++)
{
combine_cache[1][n] = row;
row <<= 1;
}
gf2_matrix_square(combine_cache[0], combine_cache[1], dim);
gf2_matrix_square(combine_cache[1], combine_cache[0], dim);
/* do/while to overwrite the first two layers, they are not used, but are
* re-generated in the last two layers for the Redis polynomial */
do {
gf2_matrix_square(combine_cache[cache_num], combine_cache[cache_num + prev], dim);
prev = -1;
} while (++cache_num < 64);
}
/* Return the CRC-64 of two sequential blocks, where crc1 is the CRC-64 of the
* first block, crc2 is the CRC-64 of the second block, and len2 is the length
* of the second block.
*
* If you want reflections on your CRCs; do them outside before / after.
* WARNING: if you enable USE_STATIC_COMBINE_CACHE to make this fast, you MUST
* ALWAYS USE THE SAME POLYNOMIAL, otherwise you will get the wrong results.
* You MAY bzero() the even/odd static arrays, which will induce a re-cache on
* next call as a work-around, but ... maybe just parameterize the cached
* models at that point like Mark Adler does in modern crcany/crc.c .
*/
uint64_t crc64_combine(uint64_t crc1, uint64_t crc2, uintmax_t len2, uint64_t poly, uint8_t dim) {
/* degenerate case */
if (len2 == 0)
return crc1;
unsigned cache_num = 0;
if (combine_cache[0][0] == 0) {
init_combine_cache(poly, dim);
}
/* apply len2 zeros to crc1 (first square will put the operator for one
zero byte, eight zero bits, in even) */
do
{
/* apply zeros operator for this bit of len2 */
if (len2 & 1)
crc1 = CRC_MULTIPLY(combine_cache[cache_num], crc1);
len2 >>= 1;
cache_num = (cache_num + 1) & 63;
/* if no more bits set, then done */
} while (len2 != 0);
/* return combined crc */
crc1 ^= crc2;
return crc1;
}
#undef CRC_MULTIPLY

10
src/crccombine.h Normal file
View File

@ -0,0 +1,10 @@
#include <stdint.h>
/* mask types */
typedef unsigned long long v2uq __attribute__ ((vector_size (16)));
uint64_t gf2_matrix_times_vec2(uint64_t *mat, uint64_t vec);
void init_combine_cache(uint64_t poly, uint8_t dim);
uint64_t crc64_combine(uint64_t crc1, uint64_t crc2, uintmax_t len2, uint64_t poly, uint8_t dim);

View File

@ -1,11 +1,21 @@
/*
* Copyright (C) 2013 Mark Adler
* Copyright (C) 2019-2024 Josiah Carlson
* Originally by: crc64.c Version 1.4 16 Dec 2013 Mark Adler
* Modifications by Matt Stancliff <matt@genges.com>:
* - removed CRC64-specific behavior
* - added generation of lookup tables by parameters
* - removed inversion of CRC input/result
* - removed automatic initialization in favor of explicit initialization
* Modifications by Josiah Carlson <josiah.carlson@gmail.com>
* - Added case/vector/AVX/+ versions of crc combine function; see crccombine.c
* - added optional static cache
* - Modified to use 1 thread to:
* - Partition large crc blobs into 2-3 segments
* - Process the 2-3 segments in parallel
* - Merge the resulting crcs
* -> Resulting in 10-90% performance boost for data > 1 meg
* - macro-ized to reduce copy/pasta
This software is provided 'as-is', without any express or implied
warranty. In no event will the author be held liable for any damages
@ -28,6 +38,10 @@
*/
#include "crcspeed.h"
#include "crccombine.h"
#define CRC64_LEN_MASK UINT64_C(0x7ffffffffffffff8)
#define CRC64_REVERSED_POLY UINT64_C(0x95ac9329ac4bc9b5)
/* Fill in a CRC constants table. */
void crcspeed64little_init(crcfn64 crcfn, uint64_t table[8][256]) {
@ -39,7 +53,7 @@ void crcspeed64little_init(crcfn64 crcfn, uint64_t table[8][256]) {
table[0][n] = crcfn(0, &v, 1);
}
/* generate nested CRC table for future slice-by-8 lookup */
/* generate nested CRC table for future slice-by-8/16/24+ lookup */
for (int n = 0; n < 256; n++) {
crc = table[0][n];
for (int k = 1; k < 8; k++) {
@ -47,6 +61,10 @@ void crcspeed64little_init(crcfn64 crcfn, uint64_t table[8][256]) {
table[k][n] = crc;
}
}
#if USE_STATIC_COMBINE_CACHE
/* initialize combine cache for CRC stapling for slice-by 16/24+ */
init_combine_cache(CRC64_REVERSED_POLY, 64);
#endif
}
void crcspeed16little_init(crcfn16 crcfn, uint16_t table[8][256]) {
@ -104,45 +122,151 @@ void crcspeed16big_init(crcfn16 fn, uint16_t big_table[8][256]) {
}
}
/* Note: doing all of our crc/next modifications *before* the crc table
* references is an absolute speedup on all CPUs tested. So... keep these
* macros separate.
*/
#define DO_8_1(crc, next) \
crc ^= *(uint64_t *)next; \
next += 8
#define DO_8_2(crc) \
crc = little_table[7][(uint8_t)crc] ^ \
little_table[6][(uint8_t)(crc >> 8)] ^ \
little_table[5][(uint8_t)(crc >> 16)] ^ \
little_table[4][(uint8_t)(crc >> 24)] ^ \
little_table[3][(uint8_t)(crc >> 32)] ^ \
little_table[2][(uint8_t)(crc >> 40)] ^ \
little_table[1][(uint8_t)(crc >> 48)] ^ \
little_table[0][crc >> 56]
#define CRC64_SPLIT(div) \
olen = len; \
next2 = next1 + ((len / div) & CRC64_LEN_MASK); \
len = (next2 - next1)
#define MERGE_CRC(crcn) \
crc1 = crc64_combine(crc1, crcn, next2 - next1, CRC64_REVERSED_POLY, 64)
#define MERGE_END(last, DIV) \
len = olen - ((next2 - next1) * DIV); \
next1 = last
/* Variables so we can change for benchmarking; these seem to be fairly
* reasonable for Intel CPUs made since 2010. Please adjust as necessary if
* or when your CPU has more load / execute units. We've written benchmark code
* to help you tune your platform, see crc64Test. */
#if defined(__i386__) || defined(__X86_64__)
static size_t CRC64_TRI_CUTOFF = (2*1024);
static size_t CRC64_DUAL_CUTOFF = (128);
#else
static size_t CRC64_TRI_CUTOFF = (16*1024);
static size_t CRC64_DUAL_CUTOFF = (1024);
#endif
void set_crc64_cutoffs(size_t dual_cutoff, size_t tri_cutoff) {
CRC64_DUAL_CUTOFF = dual_cutoff;
CRC64_TRI_CUTOFF = tri_cutoff;
}
/* Calculate a non-inverted CRC multiple bytes at a time on a little-endian
* architecture. If you need inverted CRC, invert *before* calling and invert
* *after* calling.
* 64 bit crc = process 8 bytes at once;
* 64 bit crc = process 8/16/24 bytes at once;
*/
uint64_t crcspeed64little(uint64_t little_table[8][256], uint64_t crc,
uint64_t crcspeed64little(uint64_t little_table[8][256], uint64_t crc1,
void *buf, size_t len) {
unsigned char *next = buf;
unsigned char *next1 = buf;
if (CRC64_DUAL_CUTOFF < 1) {
goto final;
}
/* process individual bytes until we reach an 8-byte aligned pointer */
while (len && ((uintptr_t)next & 7) != 0) {
crc = little_table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8);
while (len && ((uintptr_t)next1 & 7) != 0) {
crc1 = little_table[0][(crc1 ^ *next1++) & 0xff] ^ (crc1 >> 8);
len--;
}
/* fast middle processing, 8 bytes (aligned!) per loop */
if (len > CRC64_TRI_CUTOFF) {
/* 24 bytes per loop, doing 3 parallel 8 byte chunks at a time */
unsigned char *next2, *next3;
uint64_t olen, crc2=0, crc3=0;
CRC64_SPLIT(3);
/* len is now the length of the first segment, the 3rd segment possibly
* having extra bytes to clean up at the end
*/
next3 = next2 + len;
while (len >= 8) {
crc ^= *(uint64_t *)next;
crc = little_table[7][crc & 0xff] ^
little_table[6][(crc >> 8) & 0xff] ^
little_table[5][(crc >> 16) & 0xff] ^
little_table[4][(crc >> 24) & 0xff] ^
little_table[3][(crc >> 32) & 0xff] ^
little_table[2][(crc >> 40) & 0xff] ^
little_table[1][(crc >> 48) & 0xff] ^
little_table[0][crc >> 56];
next += 8;
len -= 8;
DO_8_1(crc1, next1);
DO_8_1(crc2, next2);
DO_8_1(crc3, next3);
DO_8_2(crc1);
DO_8_2(crc2);
DO_8_2(crc3);
}
/* merge the 3 crcs */
MERGE_CRC(crc2);
MERGE_CRC(crc3);
MERGE_END(next3, 3);
} else if (len > CRC64_DUAL_CUTOFF) {
/* 16 bytes per loop, doing 2 parallel 8 byte chunks at a time */
unsigned char *next2;
uint64_t olen, crc2=0;
CRC64_SPLIT(2);
/* len is now the length of the first segment, the 2nd segment possibly
* having extra bytes to clean up at the end
*/
while (len >= 8) {
len -= 8;
DO_8_1(crc1, next1);
DO_8_1(crc2, next2);
DO_8_2(crc1);
DO_8_2(crc2);
}
/* merge the 2 crcs */
MERGE_CRC(crc2);
MERGE_END(next2, 2);
}
/* We fall through here to handle our <CRC64_DUAL_CUTOFF inputs, and for any trailing
* bytes that wasn't evenly divisble by 16 or 24 above. */
/* fast processing, 8 bytes (aligned!) per loop */
while (len >= 8) {
len -= 8;
DO_8_1(crc1, next1);
DO_8_2(crc1);
}
final:
/* process remaining bytes (can't be larger than 8) */
while (len) {
crc = little_table[0][(crc ^ *next++) & 0xff] ^ (crc >> 8);
crc1 = little_table[0][(crc1 ^ *next1++) & 0xff] ^ (crc1 >> 8);
len--;
}
return crc;
return crc1;
}
/* clean up our namespace */
#undef DO_8_1
#undef DO_8_2
#undef CRC64_SPLIT
#undef MERGE_CRC
#undef MERGE_END
#undef CRC64_REVERSED_POLY
#undef CRC64_LEN_MASK
/* note: similar perf advantages can be had for long strings in crc16 using all
* of the same optimizations as above; though this is unnecessary. crc16 is
* normally used to shard keys; not hash / verify data, so is used on shorter
* data that doesn't warrant such changes. */
uint16_t crcspeed16little(uint16_t little_table[8][256], uint16_t crc,
void *buf, size_t len) {
unsigned char *next = buf;
@ -190,6 +314,10 @@ uint64_t crcspeed64big(uint64_t big_table[8][256], uint64_t crc, void *buf,
len--;
}
/* note: alignment + 2/3-way processing can probably be handled here nearly
the same as above, using our updated DO_8_2 macro. Not included in these
changes, as other authors, I don't have big-endian to test with. */
while (len >= 8) {
crc ^= *(uint64_t *)next;
crc = big_table[0][crc & 0xff] ^

View File

@ -34,6 +34,8 @@
typedef uint64_t (*crcfn64)(uint64_t, const void *, const uint64_t);
typedef uint16_t (*crcfn16)(uint16_t, const void *, const uint64_t);
void set_crc64_cutoffs(size_t dual_cutoff, size_t tri_cutoff);
/* CRC-64 */
void crcspeed64little_init(crcfn64 fn, uint64_t table[8][256]);
void crcspeed64big_init(crcfn64 fn, uint64_t table[8][256]);

View File

@ -21,6 +21,8 @@
* C-level DB API
*----------------------------------------------------------------------------*/
static_assert(MAX_KEYSIZES_TYPES == OBJ_TYPE_BASIC_MAX, "Must be equal");
/* Flags for expireIfNeeded */
#define EXPIRE_FORCE_DELETE_EXPIRED 1
#define EXPIRE_AVOID_DELETE_EXPIRED 2
@ -46,6 +48,48 @@ void updateLFU(robj *val) {
val->lru = (LFUGetTimeInMinutes()<<8) | counter;
}
/*
* Update histogram of keys-sizes
*
* It is used to track the distribution of key sizes in the dataset. It is updated
* every time key's length is modified. Available to user via INFO command.
*
* The histogram is a base-2 logarithmic histogram, with 64 bins. The i'th bin
* represents the number of keys with a size in the range 2^i and 2^(i+1)
* exclusive. oldLen/newLen must be smaller than 2^48, and if their value
* equals 0, it means that the key is being created/deleted, respectively. Each
* data type has its own histogram and it is per database (In addition, there is
* histogram per slot for future cluster use).
*
* Examples to LEN values and corresponding bins in histogram:
* [1,2)->0 [2,4)->1 [4,8)->2 [8,16)->3
*/
void updateKeysizesHist(redisDb *db, int didx, uint32_t type, uint64_t oldLen, uint64_t newLen) {
if(unlikely(type >= OBJ_TYPE_BASIC_MAX))
return;
kvstoreDictMetadata *dictMeta = kvstoreGetDictMetadata(db->keys, didx);
kvstoreMetadata *kvstoreMeta = kvstoreGetMetadata(db->keys);
if (oldLen != 0) {
int old_bin = log2ceil(oldLen);
debugServerAssertWithInfo(server.current_client, NULL, old_bin < MAX_KEYSIZES_BINS);
/* If following a key deletion it is last one in slot's dict, then
* slot's dict might get released as well. Verify if metadata is not NULL. */
if(dictMeta) dictMeta->keysizes_hist[type][old_bin]--;
kvstoreMeta->keysizes_hist[type][old_bin]--;
}
if (newLen != 0) {
int new_bin = log2ceil(newLen);
debugServerAssertWithInfo(server.current_client, NULL, new_bin < MAX_KEYSIZES_BINS);
/* If following a key deletion it is last one in slot's dict, then
* slot's dict might get released as well. Verify if metadata is not NULL. */
if(dictMeta) dictMeta->keysizes_hist[type][new_bin]++;
kvstoreMeta->keysizes_hist[type][new_bin]++;
}
}
/* Lookup a key for read or write operations, or return NULL if the key is not
* found in the specified DB. This function implements the functionality of
* lookupKeyRead(), lookupKeyWrite() and their ...WithFlags() variants.
@ -205,6 +249,7 @@ static dictEntry *dbAddInternal(redisDb *db, robj *key, robj *val, int update_if
kvstoreDictSetVal(db->keys, slot, de, val);
signalKeyAsReady(db, key, val->type);
notifyKeyspaceEvent(NOTIFY_NEW,"new",key,db->id);
updateKeysizesHist(db, slot, val->type, 0, getObjectLength(val)); /* add hist */
return de;
}
@ -250,6 +295,7 @@ int dbAddRDBLoad(redisDb *db, sds key, robj *val) {
int slot = getKeySlot(key);
dictEntry *de = kvstoreDictAddRaw(db->keys, slot, key, NULL);
if (de == NULL) return 0;
updateKeysizesHist(db, slot, val->type, 0, getObjectLength(val)); /* add hist */
initObjectLRUOrLFU(val);
kvstoreDictSetVal(db->keys, slot, de, val);
return 1;
@ -273,6 +319,9 @@ static void dbSetValue(redisDb *db, robj *key, robj *val, int overwrite, dictEnt
serverAssertWithInfo(NULL,key,de != NULL);
robj *old = dictGetVal(de);
/* Remove old key from keysizes histogram */
updateKeysizesHist(db, slot, old->type, getObjectLength(old), 0); /* remove hist */
val->lru = old->lru;
if (overwrite) {
@ -291,6 +340,9 @@ static void dbSetValue(redisDb *db, robj *key, robj *val, int overwrite, dictEnt
}
kvstoreDictSetVal(db->keys, slot, de, val);
/* Add new key to keysizes histogram */
updateKeysizesHist(db, slot, val->type, 0, getObjectLength(val));
/* if hash with HFEs, take care to remove from global HFE DS */
if (old->type == OBJ_HASH)
hashTypeRemoveFromExpires(&db->hexpires, old);
@ -404,6 +456,9 @@ int dbGenericDelete(redisDb *db, robj *key, int async, int flags) {
if (de) {
robj *val = dictGetVal(de);
/* remove key from histogram */
updateKeysizesHist(db, slot, val->type, getObjectLength(val), 0);
/* If hash object with expiry on fields, remove it from HFE DS of DB */
if (val->type == OBJ_HASH)
hashTypeRemoveFromExpires(&db->hexpires, val);
@ -599,7 +654,8 @@ redisDb *initTempDb(void) {
redisDb *tempDb = zcalloc(sizeof(redisDb)*server.dbnum);
for (int i=0; i<server.dbnum; i++) {
tempDb[i].id = i;
tempDb[i].keys = kvstoreCreate(&dbDictType, slot_count_bits, flags);
tempDb[i].keys = kvstoreCreate(&dbDictType, slot_count_bits,
flags | KVSTORE_ALLOC_META_KEYS_HIST);
tempDb[i].expires = kvstoreCreate(&dbExpiresDictType, slot_count_bits, flags);
tempDb[i].hexpires = ebCreate();
}
@ -1200,7 +1256,7 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) {
* The exception to the above is ZSET, where we do allocate temporary
* strings even when scanning a dict. */
if (o && (!ht || o->type == OBJ_ZSET)) {
listSetFreeMethod(keys, (void (*)(void*))sdsfree);
listSetFreeMethod(keys, sdsfreegeneric);
}
/* For main dictionary scan or data structure using hashtable. */

View File

@ -2,6 +2,9 @@
* Copyright (c) 2009-Present, Redis Ltd.
* All rights reserved.
*
* Copyright (c) 2024-present, Valkey contributors.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
*
@ -483,6 +486,8 @@ void debugCommand(client *c) {
" In case RESET is provided the peak reset time will be restored to the default value",
"REPLYBUFFER RESIZING <0|1>",
" Enable or disable the reply buffer resize cron job",
"REPL-PAUSE <clear|after-fork|before-rdb-channel|on-streaming-repl-buf>",
" Pause the server's main process during various replication steps.",
"DICT-RESIZING <0|1>",
" Enable or disable the main dict and expire dict resizing.",
"SCRIPT <LIST|<sha>>",
@ -1018,6 +1023,20 @@ NULL
return;
}
addReply(c, shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr, "repl-pause") && c->argc == 3) {
if (!strcasecmp(c->argv[2]->ptr, "clear")) {
server.repl_debug_pause = REPL_DEBUG_PAUSE_NONE;
} else if (!strcasecmp(c->argv[2]->ptr,"after-fork")) {
server.repl_debug_pause |= REPL_DEBUG_AFTER_FORK;
} else if (!strcasecmp(c->argv[2]->ptr,"before-rdb-channel")) {
server.repl_debug_pause |= REPL_DEBUG_BEFORE_RDB_CHANNEL;
} else if (!strcasecmp(c->argv[2]->ptr, "on-streaming-repl-buf")) {
server.repl_debug_pause |= REPL_DEBUG_ON_STREAMING_REPL_BUF;
} else {
addReplySubcommandSyntaxError(c);
return;
}
addReply(c, shared.ok);
} else if (!strcasecmp(c->argv[1]->ptr, "dict-resizing") && c->argc == 3) {
server.dict_resizing = atoi(c->argv[2]->ptr);
addReply(c, shared.ok);
@ -1052,6 +1071,46 @@ NULL
/* =========================== Crash handling ============================== */
/* When hide-user-data-from-log is enabled, to avoid leaking user info, we only
* print tokens of the current command into the log. First, we collect command
* tokens into this struct (Commands tokens are defined in json schema). Later,
* checking each argument against the token list. */
#define CMD_TOKEN_MAX_COUNT 128 /* Max token count in a command's json schema */
struct cmdToken {
const char *tokens[CMD_TOKEN_MAX_COUNT];
int n_token;
};
/* Collect tokens from command arguments recursively. */
static void cmdTokenCollect(struct cmdToken *tk, redisCommandArg *args, int argc) {
if (args == NULL)
return;
for (int i = 0; i < argc && tk->n_token < CMD_TOKEN_MAX_COUNT; i++) {
if (args[i].token)
tk->tokens[tk->n_token++] = args[i].token;
cmdTokenCollect(tk, args[i].subargs, args[i].num_args);
}
}
/* Get tokens of the command. */
static void cmdTokenGetFromCommand(struct cmdToken *tk, struct redisCommand *cmd) {
tk->n_token = 0;
cmdTokenCollect(tk, cmd->args, cmd->num_args);
}
/* Check if object is one of command's tokens. */
static int cmdTokenCheck(struct cmdToken *tk, robj *o) {
if (o->type != OBJ_STRING || !sdsEncodedObject(o))
return 0;
for (int i = 0; i < tk->n_token; i++) {
if (strcasecmp(tk->tokens[i], o->ptr) == 0)
return 1;
}
return 0;
}
__attribute__ ((noinline))
void _serverAssert(const char *estr, const char *file, int line) {
int new_report = bugReportStart();
@ -1072,28 +1131,35 @@ void _serverAssert(const char *estr, const char *file, int line) {
bugReportEnd(0, 0);
}
/* Returns the amount of client's command arguments we allow logging */
int clientArgsToLog(const client *c) {
return server.hide_user_data_from_log ? 1 : c->argc;
}
void _serverAssertPrintClientInfo(const client *c) {
int j;
char conninfo[CONN_INFO_LEN];
struct redisCommand *cmd = NULL;
struct cmdToken tokens = {{0}};
bugReportStart();
serverLog(LL_WARNING,"=== ASSERTION FAILED CLIENT CONTEXT ===");
serverLog(LL_WARNING,"client->flags = %llu", (unsigned long long) c->flags);
serverLog(LL_WARNING,"client->conn = %s", connGetInfo(c->conn, conninfo, sizeof(conninfo)));
serverLog(LL_WARNING,"client->argc = %d", c->argc);
for (j=0; j < c->argc; j++) {
if (j >= clientArgsToLog(c)) {
serverLog(LL_WARNING,"client->argv[%d] = *redacted*",j);
continue;
if (server.hide_user_data_from_log) {
cmd = lookupCommand(c->argv, c->argc);
if (cmd)
cmdTokenGetFromCommand(&tokens, cmd);
}
for (j=0; j < c->argc; j++) {
char buf[128];
char *arg;
/* Allow command name, subcommand name and command tokens in the log. */
if (server.hide_user_data_from_log && (j != 0 && !(j == 1 && cmd && cmd->parent))) {
if (!cmdTokenCheck(&tokens, c->argv[j])) {
serverLog(LL_WARNING, "client->argv[%d] = *redacted*", j);
continue;
}
}
if (c->argv[j]->type == OBJ_STRING && sdsEncodedObject(c->argv[j])) {
arg = (char*) c->argv[j]->ptr;
} else {
@ -2061,17 +2127,28 @@ void logCurrentClient(client *cc, const char *title) {
sds client;
int j;
struct redisCommand *cmd = NULL;
struct cmdToken tokens = {{0}};
serverLog(LL_WARNING|LL_RAW, "\n------ %s CLIENT INFO ------\n", title);
client = catClientInfoString(sdsempty(),cc);
serverLog(LL_WARNING|LL_RAW,"%s\n", client);
sdsfree(client);
serverLog(LL_WARNING|LL_RAW,"argc: '%d'\n", cc->argc);
if (server.hide_user_data_from_log) {
cmd = lookupCommand(cc->argv, cc->argc);
if (cmd)
cmdTokenGetFromCommand(&tokens, cmd);
}
for (j = 0; j < cc->argc; j++) {
if (j >= clientArgsToLog(cc)) {
serverLog(LL_WARNING|LL_RAW,"argv[%d]: *redacted*\n",j);
/* Allow command name, subcommand name and command tokens in the log. */
if (server.hide_user_data_from_log && (j != 0 && !(j == 1 && cmd && cmd->parent))) {
if (!cmdTokenCheck(&tokens, cc->argv[j])) {
serverLog(LL_WARNING|LL_RAW, "argv[%d]: '*redacted*'\n", j);
continue;
}
}
robj *decoded;
decoded = getDecodedObject(cc->argv[j]);
sds repr = sdscatrepr(sdsempty(),decoded->ptr, min(sdslen(decoded->ptr), 1024));
@ -2393,6 +2470,8 @@ void removeSigSegvHandlers(void) {
}
void printCrashReport(void) {
server.crashing = 1;
/* Log INFO and CLIENT LIST */
logServerInfo();
@ -2523,6 +2602,12 @@ void applyWatchdogPeriod(void) {
}
}
void debugPauseProcess(void) {
serverLog(LL_NOTICE, "Process is about to stop.");
raise(SIGSTOP);
serverLog(LL_NOTICE, "Process has been continued.");
}
/* Positive input is sleep time in microseconds. Negative input is fractions
* of microseconds, i.e. -10 means 100 nanoseconds. */
void debugDelay(int usec) {

View File

@ -296,7 +296,7 @@ void activeDefragHfieldDictCallback(void *privdata, const dictEntry *de) {
dictUseStoredKeyApi(d, 1);
uint64_t hash = dictGetHash(d, newhf);
dictUseStoredKeyApi(d, 0);
dictEntry *de = dictFindEntryByPtrAndHash(d, hf, hash);
dictEntry *de = dictFindByHashAndPtr(d, hf, hash);
serverAssert(de);
dictSetKey(d, de, newhf);
}
@ -729,8 +729,9 @@ void defragStream(redisDb *db, dictEntry *kde) {
void defragModule(redisDb *db, dictEntry *kde) {
robj *obj = dictGetVal(kde);
serverAssert(obj->type == OBJ_MODULE);
if (!moduleDefragValue(dictGetKey(kde), obj, db->id))
robj keyobj;
initStaticStringObject(keyobj, dictGetKey(kde));
if (!moduleDefragValue(&keyobj, obj, db->id))
defragLater(db, kde);
}
@ -752,7 +753,7 @@ void defragKey(defragCtx *ctx, dictEntry *de) {
* the pointer it holds, since it won't be able to do the string
* compare, but we can find the entry using key hash and pointer. */
uint64_t hash = kvstoreGetHash(db->expires, newsds);
dictEntry *expire_de = kvstoreDictFindEntryByPtrAndHash(db->expires, slot, keysds, hash);
dictEntry *expire_de = kvstoreDictFindByHashAndPtr(db->expires, slot, keysds, hash);
if (expire_de) kvstoreDictSetKey(db->expires, slot, expire_de, newsds);
}
@ -940,7 +941,9 @@ int defragLaterItem(dictEntry *de, unsigned long *cursor, long long endtime, int
} else if (ob->type == OBJ_STREAM) {
return scanLaterStreamListpacks(ob, cursor, endtime);
} else if (ob->type == OBJ_MODULE) {
return moduleLateDefrag(dictGetKey(de), ob, cursor, endtime, dbid);
robj keyobj;
initStaticStringObject(keyobj, dictGetKey(de));
return moduleLateDefrag(&keyobj, ob, cursor, endtime, dbid);
} else {
*cursor = 0; /* object type may have changed since we schedule it for later */
}

View File

@ -62,6 +62,7 @@ typedef struct {
static void _dictExpandIfNeeded(dict *d);
static void _dictShrinkIfNeeded(dict *d);
static void _dictRehashStepIfNeeded(dict *d, uint64_t visitedIdx);
static signed char _dictNextExp(unsigned long size);
static int _dictInit(dict *d, dictType *type);
static dictEntry *dictGetNext(const dictEntry *de);
@ -120,13 +121,15 @@ uint64_t dictGenCaseHashFunction(const unsigned char *buf, size_t len) {
* the bit pattern of the least 3 significant bits mark the kind of entry. */
#define ENTRY_PTR_MASK 7 /* 111 */
#define ENTRY_PTR_NORMAL 0 /* 000 */
#define ENTRY_PTR_NO_VALUE 2 /* 010 */
#define ENTRY_PTR_NORMAL 0 /* 000 : If a pointer to an entry with value. */
#define ENTRY_PTR_IS_ODD_KEY 1 /* XX1 : If a pointer to odd key address (must be 1). */
#define ENTRY_PTR_IS_EVEN_KEY 2 /* 010 : If a pointer to even key address. (must be 2 or 4). */
#define ENTRY_PTR_NO_VALUE 4 /* 100 : If a pointer to an entry without value. */
/* Returns 1 if the entry pointer is a pointer to a key, rather than to an
* allocated entry. Returns 0 otherwise. */
static inline int entryIsKey(const dictEntry *de) {
return (uintptr_t)(void *)de & 1;
return ((uintptr_t)de & (ENTRY_PTR_IS_ODD_KEY | ENTRY_PTR_IS_EVEN_KEY));
}
/* Returns 1 if the pointer is actually a pointer to a dictEntry struct. Returns
@ -155,7 +158,6 @@ static inline dictEntry *encodeMaskedPtr(const void *ptr, unsigned int bits) {
}
static inline void *decodeMaskedPtr(const dictEntry *de) {
assert(!entryIsKey(de));
return (void *)((uintptr_t)(void *)de & ~ENTRY_PTR_MASK);
}
@ -275,6 +277,12 @@ int _dictResize(dict *d, unsigned long size, int* malloc_failed)
return DICT_OK;
}
/* Force a full rehashing of the dictionary */
if (d->type->force_full_rehash) {
while (dictRehash(d, 1000)) {
/* Continue rehashing */
}
}
return DICT_OK;
}
@ -326,18 +334,17 @@ static void rehashEntriesInBucketAtIndex(dict *d, uint64_t idx) {
h = idx & DICTHT_SIZE_MASK(d->ht_size_exp[1]);
}
if (d->type->no_value) {
if (d->type->keys_are_odd && !d->ht_table[1][h]) {
/* Destination bucket is empty and we can store the key
* directly without an allocated entry. Free the old entry
* if it's an allocated entry.
*
* TODO: Add a flag 'keys_are_even' and if set, we can use
* this optimization for these dicts too. We can set the LSB
* bit when stored as a dict entry and clear it again when
* we need the key back. */
assert(entryIsKey(key));
if (!d->ht_table[1][h]) {
/* The destination bucket is empty, allowing the key to be stored
* directly without allocating a dictEntry. If an old entry was
* previously allocated, free its memory. */
if (!entryIsKey(de)) zfree(decodeMaskedPtr(de));
de = key;
if (d->type->keys_are_odd)
de = key; /* ENTRY_PTR_IS_ODD_KEY trivially set by the odd key. */
else
de = encodeMaskedPtr(key, ENTRY_PTR_IS_EVEN_KEY);
} else if (entryIsKey(de)) {
/* We don't have an allocated entry but we need one. */
de = createEntryNoValue(key, d->ht_table[1][h]);
@ -509,6 +516,39 @@ dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
return dictInsertAtPosition(d, key, position);
}
/* Low-level add function for non-existing keys:
* This function adds a new entry to the dictionary, assuming the key does not
* already exist.
* Parameters:
* - `dict *d`: Pointer to the dictionary structure.
* - `void *key`: Pointer to the key being added.
* - `const uint64_t hash`: hash of the key being added.
* Guarantees:
* - The key is assumed to be non-existing.
* Note:
* Ensure that the key's uniqueness is managed externally before calling this function. */
dictEntry *dictAddNonExistsByHash(dict *d, void *key, const uint64_t hash) {
/* Get the position for the new key, it should never be NULL. */
unsigned long idx, table;
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
/* Rehash the hash table if needed */
_dictRehashStepIfNeeded(d,idx);
/* Expand the hash table if needed */
_dictExpandIfNeeded(d);
table = dictIsRehashing(d) ? 1 : 0;
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
void *position = &d->ht_table[table][idx];
assert(position!=NULL);
/* Dup the key if necessary. */
if (d->type->keyDup) key = d->type->keyDup(d, key);
return dictInsertAtPosition(d, key, position);
}
/* Adds a key in the dict's hashtable at the position returned by a preceding
* call to dictFindPositionForInsert. This is a low level function which allows
* splitting dictAddRaw in two parts. Normally, dictAddRaw or dictAdd should be
@ -522,16 +562,17 @@ dictEntry *dictInsertAtPosition(dict *d, void *key, void *position) {
assert(bucket >= &d->ht_table[htidx][0] &&
bucket <= &d->ht_table[htidx][DICTHT_SIZE_MASK(d->ht_size_exp[htidx])]);
if (d->type->no_value) {
if (d->type->keys_are_odd && !*bucket) {
/* We can store the key directly in the destination bucket without the
* allocated entry.
*
* TODO: Add a flag 'keys_are_even' and if set, we can use this
* optimization for these dicts too. We can set the LSB bit when
* stored as a dict entry and clear it again when we need the key
* back. */
if (!*bucket) {
/* We can store the key directly in the destination bucket without
* allocating dictEntry.
*/
if (d->type->keys_are_odd) {
entry = key;
assert(entryIsKey(entry));
/* The flag ENTRY_PTR_IS_ODD_KEY (=0x1) is already aligned with LSB bit */
} else {
entry = encodeMaskedPtr(key, ENTRY_PTR_IS_EVEN_KEY);
}
} else {
/* Allocate an entry without value. */
entry = createEntryNoValue(key, *bucket);
@ -608,17 +649,8 @@ static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
h = dictHashKey(d, key, d->useStoredKeyApi);
idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
if (dictIsRehashing(d)) {
if ((long)idx >= d->rehashidx && d->ht_table[0][idx]) {
/* If we have a valid hash entry at `idx` in ht0, we perform
* rehash on the bucket at `idx` (being more CPU cache friendly) */
_dictBucketRehash(d, idx);
} else {
/* If the hash entry is not in ht0, we rehash the buckets based
* on the rehashidx (not CPU cache friendly). */
_dictRehashStep(d);
}
}
/* Rehash the hash table if needed */
_dictRehashStepIfNeeded(d,idx);
keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d);
@ -697,8 +729,9 @@ int _dictClear(dict *d, int htidx, void(callback)(dict*)) {
/* Free all the elements */
for (i = 0; i < DICTHT_SIZE(d->ht_size_exp[htidx]) && d->ht_used[htidx] > 0; i++) {
dictEntry *he, *nextHe;
if (callback && (i & 65535) == 0) callback(d);
/* Callback will be called once for every 65535 deletions. Beware,
* if dict has less than 65535 items, it will not be called at all.*/
if (callback && i != 0 && (i & 65535) == 0) callback(d);
if ((he = d->ht_table[htidx][i]) == NULL) continue;
while(he) {
@ -733,44 +766,49 @@ void dictRelease(dict *d)
zfree(d);
}
dictEntry *dictFind(dict *d, const void *key)
{
dictEntry *dictFindByHash(dict *d, const void *key, const uint64_t hash) {
dictEntry *he;
uint64_t h, idx, table;
uint64_t idx, table;
if (dictSize(d) == 0) return NULL; /* dict is empty */
h = dictHashKey(d, key, d->useStoredKeyApi);
idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d);
if (dictIsRehashing(d)) {
if ((long)idx >= d->rehashidx && d->ht_table[0][idx]) {
/* If we have a valid hash entry at `idx` in ht0, we perform
* rehash on the bucket at `idx` (being more CPU cache friendly) */
_dictBucketRehash(d, idx);
} else {
/* If the hash entry is not in ht0, we rehash the buckets based
* on the rehashidx (not CPU cache friendly). */
_dictRehashStep(d);
}
}
/* Rehash the hash table if needed */
_dictRehashStepIfNeeded(d,idx);
for (table = 0; table <= 1; table++) {
if (table == 0 && (long)idx < d->rehashidx) continue;
idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
/* Prefetch the bucket at the calculated index */
redis_prefetch_read(&d->ht_table[table][idx]);
he = d->ht_table[table][idx];
while(he) {
void *he_key = dictGetKey(he);
/* Prefetch the next entry to improve cache efficiency */
redis_prefetch_read(dictGetNext(he));
if (key == he_key || cmpFunc(d, key, he_key))
return he;
he = dictGetNext(he);
}
if (!dictIsRehashing(d)) return NULL;
/* Use unlikely to optimize branch prediction for the common case */
if (unlikely(!dictIsRehashing(d))) return NULL;
}
return NULL;
}
dictEntry *dictFind(dict *d, const void *key)
{
if (dictSize(d) == 0) return NULL; /* dict is empty */
const uint64_t hash = dictHashKey(d, key, d->useStoredKeyApi);
return dictFindByHash(d,key,hash);
}
void *dictFetchValue(dict *d, const void *key) {
dictEntry *he;
@ -877,7 +915,10 @@ double dictIncrDoubleVal(dictEntry *de, double val) {
}
void *dictGetKey(const dictEntry *de) {
if (entryIsKey(de)) return (void*)de;
/* if entryIsKey() */
if ((uintptr_t)de & ENTRY_PTR_IS_ODD_KEY) return (void *) de;
if ((uintptr_t)de & ENTRY_PTR_IS_EVEN_KEY) return decodeMaskedPtr(de);
/* Regular entry */
if (entryIsNoValue(de)) return decodeEntryNoValue(de)->key;
return de->key;
}
@ -1556,6 +1597,21 @@ static void _dictShrinkIfNeeded(dict *d)
dictShrinkIfNeeded(d);
}
static void _dictRehashStepIfNeeded(dict *d, uint64_t visitedIdx) {
if ((!dictIsRehashing(d)) || (d->pauserehash != 0))
return;
/* rehashing not in progress if rehashidx == -1 */
if ((long)visitedIdx >= d->rehashidx && d->ht_table[0][visitedIdx]) {
/* If we have a valid hash entry at `idx` in ht0, we perform
* rehash on the bucket at `idx` (being more CPU cache friendly) */
_dictBucketRehash(d, visitedIdx);
} else {
/* If the hash entry is not in ht0, we rehash the buckets based
* on the rehashidx (not CPU cache friendly). */
dictRehash(d,1);
}
}
/* Our hash table capability is a power of two */
static signed char _dictNextExp(unsigned long size)
{
@ -1576,17 +1632,8 @@ void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing)
if (existing) *existing = NULL;
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
if (dictIsRehashing(d)) {
if ((long)idx >= d->rehashidx && d->ht_table[0][idx]) {
/* If we have a valid hash entry at `idx` in ht0, we perform
* rehash on the bucket at `idx` (being more CPU cache friendly) */
_dictBucketRehash(d, idx);
} else {
/* If the hash entry is not in ht0, we rehash the buckets based
* on the rehashidx (not CPU cache friendly). */
_dictRehashStep(d);
}
}
/* Rehash the hash table if needed */
_dictRehashStepIfNeeded(d,idx);
/* Expand the hash table if needed */
_dictExpandIfNeeded(d);
@ -1614,6 +1661,7 @@ void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing)
return bucket;
}
void dictEmpty(dict *d, void(callback)(dict*)) {
/* Someone may be monitoring a dict that started rehashing, before
* destroying the dict fake completion. */
@ -1639,7 +1687,7 @@ uint64_t dictGetHash(dict *d, const void *key) {
* the hash value should be provided using dictGetHash.
* no string / key comparison is performed.
* return value is a pointer to the dictEntry if found, or NULL if not found. */
dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash) {
dictEntry *dictFindByHashAndPtr(dict *d, const void *oldptr, const uint64_t hash) {
dictEntry *he;
unsigned long idx, table;
@ -1821,6 +1869,32 @@ char *stringFromLongLong(long long value) {
return s;
}
char *stringFromSubstring(void) {
#define LARGE_STRING_SIZE 10000
#define MIN_STRING_SIZE 100
#define MAX_STRING_SIZE 500
static char largeString[LARGE_STRING_SIZE + 1];
static int init = 0;
if (init == 0) {
/* Generate a large string */
for (size_t i = 0; i < LARGE_STRING_SIZE; i++) {
/* Random printable ASCII character (33 to 126) */
largeString[i] = 33 + (rand() % 94);
}
/* Null-terminate the large string */
largeString[LARGE_STRING_SIZE] = '\0';
init = 1;
}
/* Randomly choose a size between minSize and maxSize */
size_t substringSize = MIN_STRING_SIZE + (rand() % (MAX_STRING_SIZE - MIN_STRING_SIZE + 1));
size_t startIndex = rand() % (LARGE_STRING_SIZE - substringSize + 1);
/* Allocate memory for the substring (+1 for null terminator) */
char *s = zmalloc(substringSize + 1);
memcpy(s, largeString + startIndex, substringSize); // Copy the substring
s[substringSize] = '\0'; // Null-terminate the string
return s;
}
dictType BenchmarkDictType = {
hashCallback,
NULL,
@ -1842,7 +1916,9 @@ int dictTest(int argc, char **argv, int flags) {
long j;
long long start, elapsed;
int retval;
dict *dict = dictCreate(&BenchmarkDictType);
dict *d = dictCreate(&BenchmarkDictType);
dictEntry* de = NULL;
dictEntry* existing = NULL;
long count = 0;
unsigned long new_dict_size, current_dict_used, remain_keys;
int accurate = (flags & REDIS_TEST_ACCURATE);
@ -1860,12 +1936,12 @@ int dictTest(int argc, char **argv, int flags) {
TEST("Add 16 keys and verify dict resize is ok") {
dictSetResizeEnabled(DICT_RESIZE_ENABLE);
for (j = 0; j < 16; j++) {
retval = dictAdd(dict,stringFromLongLong(j),(void*)j);
retval = dictAdd(d,stringFromLongLong(j),(void*)j);
assert(retval == DICT_OK);
}
while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000);
assert(dictSize(dict) == 16);
assert(dictBuckets(dict) == 16);
while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000);
assert(dictSize(d) == 16);
assert(dictBuckets(d) == 16);
}
TEST("Use DICT_RESIZE_AVOID to disable the dict resize and pad to (dict_force_resize_ratio * 16)") {
@ -1874,132 +1950,218 @@ int dictTest(int argc, char **argv, int flags) {
* dict_force_resize_ratio in next test. */
dictSetResizeEnabled(DICT_RESIZE_AVOID);
for (j = 16; j < (long)dict_force_resize_ratio * 16; j++) {
retval = dictAdd(dict,stringFromLongLong(j),(void*)j);
retval = dictAdd(d,stringFromLongLong(j),(void*)j);
assert(retval == DICT_OK);
}
current_dict_used = dict_force_resize_ratio * 16;
assert(dictSize(dict) == current_dict_used);
assert(dictBuckets(dict) == 16);
assert(dictSize(d) == current_dict_used);
assert(dictBuckets(d) == 16);
}
TEST("Add one more key, trigger the dict resize") {
retval = dictAdd(dict,stringFromLongLong(current_dict_used),(void*)(current_dict_used));
retval = dictAdd(d,stringFromLongLong(current_dict_used),(void*)(current_dict_used));
assert(retval == DICT_OK);
current_dict_used++;
new_dict_size = 1UL << _dictNextExp(current_dict_used);
assert(dictSize(dict) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == 16);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == new_dict_size);
assert(dictSize(d) == current_dict_used);
assert(DICTHT_SIZE(d->ht_size_exp[0]) == 16);
assert(DICTHT_SIZE(d->ht_size_exp[1]) == new_dict_size);
/* Wait for rehashing. */
dictSetResizeEnabled(DICT_RESIZE_ENABLE);
while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000);
assert(dictSize(dict) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0);
while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000);
assert(dictSize(d) == current_dict_used);
assert(DICTHT_SIZE(d->ht_size_exp[0]) == new_dict_size);
assert(DICTHT_SIZE(d->ht_size_exp[1]) == 0);
}
TEST("Delete keys until we can trigger shrink in next test") {
/* Delete keys until we can satisfy (1 / HASHTABLE_MIN_FILL) in the next test. */
for (j = new_dict_size / HASHTABLE_MIN_FILL + 1; j < (long)current_dict_used; j++) {
char *key = stringFromLongLong(j);
retval = dictDelete(dict, key);
retval = dictDelete(d, key);
zfree(key);
assert(retval == DICT_OK);
}
current_dict_used = new_dict_size / HASHTABLE_MIN_FILL + 1;
assert(dictSize(dict) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0);
assert(dictSize(d) == current_dict_used);
assert(DICTHT_SIZE(d->ht_size_exp[0]) == new_dict_size);
assert(DICTHT_SIZE(d->ht_size_exp[1]) == 0);
}
TEST("Delete one more key, trigger the dict resize") {
current_dict_used--;
char *key = stringFromLongLong(current_dict_used);
retval = dictDelete(dict, key);
retval = dictDelete(d, key);
zfree(key);
unsigned long oldDictSize = new_dict_size;
new_dict_size = 1UL << _dictNextExp(current_dict_used);
assert(retval == DICT_OK);
assert(dictSize(dict) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == oldDictSize);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == new_dict_size);
assert(dictSize(d) == current_dict_used);
assert(DICTHT_SIZE(d->ht_size_exp[0]) == oldDictSize);
assert(DICTHT_SIZE(d->ht_size_exp[1]) == new_dict_size);
/* Wait for rehashing. */
while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000);
assert(dictSize(dict) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0);
while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000);
assert(dictSize(d) == current_dict_used);
assert(DICTHT_SIZE(d->ht_size_exp[0]) == new_dict_size);
assert(DICTHT_SIZE(d->ht_size_exp[1]) == 0);
}
TEST("Empty the dictionary and add 128 keys") {
dictEmpty(dict, NULL);
dictEmpty(d, NULL);
for (j = 0; j < 128; j++) {
retval = dictAdd(dict,stringFromLongLong(j),(void*)j);
retval = dictAdd(d,stringFromLongLong(j),(void*)j);
assert(retval == DICT_OK);
}
while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000);
assert(dictSize(dict) == 128);
assert(dictBuckets(dict) == 128);
while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000);
assert(dictSize(d) == 128);
assert(dictBuckets(d) == 128);
}
TEST("Use DICT_RESIZE_AVOID to disable the dict resize and reduce to 3") {
/* Use DICT_RESIZE_AVOID to disable the dict reset, and reduce
* the number of keys until we can trigger shrinking in next test. */
dictSetResizeEnabled(DICT_RESIZE_AVOID);
remain_keys = DICTHT_SIZE(dict->ht_size_exp[0]) / (HASHTABLE_MIN_FILL * dict_force_resize_ratio) + 1;
remain_keys = DICTHT_SIZE(d->ht_size_exp[0]) / (HASHTABLE_MIN_FILL * dict_force_resize_ratio) + 1;
for (j = remain_keys; j < 128; j++) {
char *key = stringFromLongLong(j);
retval = dictDelete(dict, key);
retval = dictDelete(d, key);
zfree(key);
assert(retval == DICT_OK);
}
current_dict_used = remain_keys;
assert(dictSize(dict) == remain_keys);
assert(dictBuckets(dict) == 128);
assert(dictSize(d) == remain_keys);
assert(dictBuckets(d) == 128);
}
TEST("Delete one more key, trigger the dict resize") {
current_dict_used--;
char *key = stringFromLongLong(current_dict_used);
retval = dictDelete(dict, key);
retval = dictDelete(d, key);
zfree(key);
new_dict_size = 1UL << _dictNextExp(current_dict_used);
assert(retval == DICT_OK);
assert(dictSize(dict) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == 128);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == new_dict_size);
assert(dictSize(d) == current_dict_used);
assert(DICTHT_SIZE(d->ht_size_exp[0]) == 128);
assert(DICTHT_SIZE(d->ht_size_exp[1]) == new_dict_size);
/* Wait for rehashing. */
dictSetResizeEnabled(DICT_RESIZE_ENABLE);
while (dictIsRehashing(dict)) dictRehashMicroseconds(dict,1000);
assert(dictSize(dict) == current_dict_used);
assert(DICTHT_SIZE(dict->ht_size_exp[0]) == new_dict_size);
assert(DICTHT_SIZE(dict->ht_size_exp[1]) == 0);
while (dictIsRehashing(d)) dictRehashMicroseconds(d,1000);
assert(dictSize(d) == current_dict_used);
assert(DICTHT_SIZE(d->ht_size_exp[0]) == new_dict_size);
assert(DICTHT_SIZE(d->ht_size_exp[1]) == 0);
}
TEST("Restore to original state") {
dictEmpty(dict, NULL);
dictEmpty(d, NULL);
dictSetResizeEnabled(DICT_RESIZE_ENABLE);
}
srand(12345);
start_benchmark();
for (j = 0; j < count; j++) {
/* Create a dynamically allocated substring */
char *key = stringFromSubstring();
/* Insert the range directly from the large string */
de = dictAddRaw(d, key, &existing);
assert(de != NULL || existing != NULL);
/* If key already exists NULL is returned so we need to free the temp key string */
if (de == NULL) zfree(key);
}
end_benchmark("Inserting random substrings (100-500B) from large string with symbols");
assert((long)dictSize(d) <= count);
dictEmpty(d, NULL);
start_benchmark();
for (j = 0; j < count; j++) {
retval = dictAdd(dict,stringFromLongLong(j),(void*)j);
retval = dictAdd(d,stringFromLongLong(j),(void*)j);
assert(retval == DICT_OK);
}
end_benchmark("Inserting");
assert((long)dictSize(dict) == count);
end_benchmark("Inserting via dictAdd() non existing");
assert((long)dictSize(d) == count);
dictEmpty(d, NULL);
start_benchmark();
for (j = 0; j < count; j++) {
de = dictAddRaw(d,stringFromLongLong(j),NULL);
assert(de != NULL);
}
end_benchmark("Inserting via dictAddRaw() non existing");
assert((long)dictSize(d) == count);
start_benchmark();
for (j = 0; j < count; j++) {
void *key = stringFromLongLong(j);
de = dictAddRaw(d,key,&existing);
assert(existing != NULL);
zfree(key);
}
end_benchmark("Inserting via dictAddRaw() existing (no insertion)");
assert((long)dictSize(d) == count);
dictEmpty(d, NULL);
start_benchmark();
for (j = 0; j < count; j++) {
void *key = stringFromLongLong(j);
const uint64_t hash = dictGetHash(d, key);
de = dictAddNonExistsByHash(d,key,hash);
assert(de != NULL);
}
end_benchmark("Inserting via dictAddNonExistsByHash() non existing");
assert((long)dictSize(d) == count);
/* Wait for rehashing. */
while (dictIsRehashing(dict)) {
dictRehashMicroseconds(dict,100*1000);
while (dictIsRehashing(d)) {
dictRehashMicroseconds(d,100*1000);
}
dictEmpty(d, NULL);
start_benchmark();
for (j = 0; j < count; j++) {
/* Create a key */
void *key = stringFromLongLong(j);
/* Check if the key exists */
dictEntry *entry = dictFind(d, key);
assert(entry == NULL);
/* Add the key */
dictEntry *de = dictAddRaw(d, key, NULL);
assert(de != NULL);
}
end_benchmark("Find() and inserting via dictFind()+dictAddRaw() non existing");
dictEmpty(d, NULL);
start_benchmark();
for (j = 0; j < count; j++) {
/* Create a key */
void *key = stringFromLongLong(j);
uint64_t hash = dictGetHash(d, key);
/* Check if the key exists */
dictEntry *entry = dictFindByHash(d, key, hash);
assert(entry == NULL);
de = dictAddNonExistsByHash(d, key, hash);
assert(de != NULL);
}
end_benchmark("Find() and inserting via dictGetHash()+dictFindByHash()+dictAddNonExistsByHash() non existing");
assert((long)dictSize(d) == count);
/* Wait for rehashing. */
while (dictIsRehashing(d)) {
dictRehashMicroseconds(d,100*1000);
}
start_benchmark();
for (j = 0; j < count; j++) {
char *key = stringFromLongLong(j);
dictEntry *de = dictFind(dict,key);
dictEntry *de = dictFind(d,key);
assert(de != NULL);
zfree(key);
}
@ -2008,7 +2170,7 @@ int dictTest(int argc, char **argv, int flags) {
start_benchmark();
for (j = 0; j < count; j++) {
char *key = stringFromLongLong(j);
dictEntry *de = dictFind(dict,key);
dictEntry *de = dictFind(d,key);
assert(de != NULL);
zfree(key);
}
@ -2017,7 +2179,7 @@ int dictTest(int argc, char **argv, int flags) {
start_benchmark();
for (j = 0; j < count; j++) {
char *key = stringFromLongLong(rand() % count);
dictEntry *de = dictFind(dict,key);
dictEntry *de = dictFind(d,key);
assert(de != NULL);
zfree(key);
}
@ -2025,7 +2187,7 @@ int dictTest(int argc, char **argv, int flags) {
start_benchmark();
for (j = 0; j < count; j++) {
dictEntry *de = dictGetRandomKey(dict);
dictEntry *de = dictGetRandomKey(d);
assert(de != NULL);
}
end_benchmark("Accessing random keys");
@ -2034,7 +2196,7 @@ int dictTest(int argc, char **argv, int flags) {
for (j = 0; j < count; j++) {
char *key = stringFromLongLong(rand() % count);
key[0] = 'X';
dictEntry *de = dictFind(dict,key);
dictEntry *de = dictFind(d,key);
assert(de == NULL);
zfree(key);
}
@ -2043,14 +2205,52 @@ int dictTest(int argc, char **argv, int flags) {
start_benchmark();
for (j = 0; j < count; j++) {
char *key = stringFromLongLong(j);
retval = dictDelete(dict,key);
retval = dictDelete(d,key);
assert(retval == DICT_OK);
key[0] += 17; /* Change first number to letter. */
retval = dictAdd(dict,key,(void*)j);
retval = dictAdd(d,key,(void*)j);
assert(retval == DICT_OK);
}
end_benchmark("Removing and adding");
dictRelease(dict);
dictRelease(d);
TEST("Use dict without values (no_value=1)") {
dictType dt = BenchmarkDictType;
dt.no_value = 1;
/* Allocate array of size count and fill it with keys (stringFromLongLong(j) */
char **lookupKeys = zmalloc(sizeof(char*) * count);
for (long j = 0; j < count; j++)
lookupKeys[j] = stringFromLongLong(j);
/* Add keys without values. */
dict *d = dictCreate(&dt);
for (j = 0; j < count; j++) {
retval = dictAdd(d,lookupKeys[j],NULL);
assert(retval == DICT_OK);
}
/* Now, we should be able to find the keys. */
for (j = 0; j < count; j++) {
dictEntry *de = dictFind(d,lookupKeys[j]);
assert(de != NULL);
}
/* Find non exists keys. */
for (j = 0; j < count; j++) {
/* Temporarily override first char of key */
char tmp = lookupKeys[j][0];
lookupKeys[j][0] = 'X';
dictEntry *de = dictFind(d,lookupKeys[j]);
lookupKeys[j][0] = tmp;
assert(de == NULL);
}
dictRelease(d);
zfree(lookupKeys);
}
return 0;
}
#endif

View File

@ -53,15 +53,19 @@ typedef struct dictType {
/* Flags */
/* The 'no_value' flag, if set, indicates that values are not used, i.e. the
* dict is a set. When this flag is set, it's not possible to access the
* value of a dictEntry and it's also impossible to use dictSetKey(). Entry
* metadata can also not be used. */
* value of a dictEntry and it's also impossible to use dictSetKey(). It
* enables an optimization to store a key directly without an allocating
* dictEntry in between, if it is the only key in the bucket. */
unsigned int no_value:1;
/* If no_value = 1 and all keys are odd (LSB=1), setting keys_are_odd = 1
* enables one more optimization: to store a key without an allocated
* dictEntry. */
/* This flag is required for `no_value` optimization since the optimization
* reuses LSB bits as metadata */
unsigned int keys_are_odd:1;
/* TODO: Add a 'keys_are_even' flag and use a similar optimization if that
* flag is set. */
/* Ensures that the entire hash table is rehashed at once if set. */
unsigned int force_full_rehash:1;
/* Sometimes we want the ability to store a key in a given way inside the hash
* function, and lookup it in some other way without resorting to any kind of
* conversion. For instance the key may be stored as a structure also
@ -196,6 +200,7 @@ int dictTryExpand(dict *d, unsigned long size);
int dictShrink(dict *d, unsigned long size);
int dictAdd(dict *d, void *key, void *val);
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing);
dictEntry *dictAddNonExistsByHash(dict *d, void *key, const uint64_t hash);
void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing);
dictEntry *dictInsertAtPosition(dict *d, void *key, void *position);
dictEntry *dictAddOrFind(dict *d, void *key);
@ -207,6 +212,8 @@ dictEntry *dictTwoPhaseUnlinkFind(dict *d, const void *key, dictEntry ***plink,
void dictTwoPhaseUnlinkFree(dict *d, dictEntry *he, dictEntry **plink, int table_index);
void dictRelease(dict *d);
dictEntry * dictFind(dict *d, const void *key);
dictEntry *dictFindByHash(dict *d, const void *key, const uint64_t hash);
dictEntry *dictFindByHashAndPtr(dict *d, const void *oldptr, const uint64_t hash);
void *dictFetchValue(dict *d, const void *key);
int dictShrinkIfNeeded(dict *d);
int dictExpandIfNeeded(dict *d);
@ -249,7 +256,6 @@ uint8_t *dictGetHashFunctionSeed(void);
unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata);
unsigned long dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata);
uint64_t dictGetHash(dict *d, const void *key);
dictEntry *dictFindEntryByPtrAndHash(dict *d, const void *oldptr, uint64_t hash);
void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size);
size_t dictGetStatsMsg(char *buf, size_t bufsize, dictStats *stats, int full);

View File

@ -93,7 +93,7 @@ struct ldbState {
* bodies in order to obtain the Lua function name, and in the implementation
* of redis.sha1().
*
* 'digest' should point to a 41 bytes buffer: 40 for SHA1 converted into an
* 'digest' should point to a 41 bytes buffer: 40 for SHA1 converted into a
* hexadecimal number, plus 1 byte for null term. */
void sha1hex(char *digest, char *script, size_t len) {
SHA1_CTX ctx;
@ -259,12 +259,17 @@ void scriptingInit(int setup) {
void freeLuaScriptsSync(dict *lua_scripts, list *lua_scripts_lru_list, lua_State *lua) {
dictRelease(lua_scripts);
listRelease(lua_scripts_lru_list);
lua_close(lua);
#if defined(USE_JEMALLOC)
/* When lua is closed, destroy the previously used private tcache. */
void *ud = (global_State*)G(lua)->ud;
unsigned int lua_tcache = (unsigned int)(uintptr_t)ud;
#endif
lua_gc(lua, LUA_GCCOLLECT, 0);
lua_close(lua);
#if defined(USE_JEMALLOC)
je_mallctl("tcache.destroy", NULL, NULL, (void *)&lua_tcache, sizeof(unsigned int));
#endif
}
@ -730,7 +735,7 @@ NULL
}
}
unsigned long evalMemory(void) {
unsigned long evalScriptsMemoryVM(void) {
return luaMemory(lctx.lua);
}
@ -738,7 +743,7 @@ dict* evalScriptsDict(void) {
return lctx.lua_scripts;
}
unsigned long evalScriptsMemory(void) {
unsigned long evalScriptsMemoryEngine(void) {
return lctx.lua_scripts_mem +
dictMemUsage(lctx.lua_scripts) +
dictSize(lctx.lua_scripts) * sizeof(luaScript) +
@ -754,7 +759,7 @@ void ldbInit(void) {
ldb.conn = NULL;
ldb.active = 0;
ldb.logs = listCreate();
listSetFreeMethod(ldb.logs,(void (*)(void*))sdsfree);
listSetFreeMethod(ldb.logs, sdsfreegeneric);
ldb.children = listCreate();
ldb.src = NULL;
ldb.lines = 0;

97
src/eventnotifier.c Normal file
View File

@ -0,0 +1,97 @@
/* eventnotifier.c -- An event notifier based on eventfd or pipe.
*
* Copyright (c) 2024-Present, Redis Ltd.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
*/
#include "eventnotifier.h"
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#ifdef HAVE_EVENT_FD
#include <sys/eventfd.h>
#endif
#include "anet.h"
#include "zmalloc.h"
eventNotifier* createEventNotifier(void) {
eventNotifier *en = zmalloc(sizeof(eventNotifier));
if (!en) return NULL;
#ifdef HAVE_EVENT_FD
if ((en->efd = eventfd(0, EFD_NONBLOCK| EFD_CLOEXEC)) != -1) {
return en;
}
#else
if (anetPipe(en->pipefd, O_CLOEXEC|O_NONBLOCK, O_CLOEXEC|O_NONBLOCK) != -1) {
return en;
}
#endif
/* Clean up if error. */
zfree(en);
return NULL;
}
int getReadEventFd(struct eventNotifier *en) {
#ifdef HAVE_EVENT_FD
return en->efd;
#else
return en->pipefd[0];
#endif
}
int getWriteEventFd(struct eventNotifier *en) {
#ifdef HAVE_EVENT_FD
return en->efd;
#else
return en->pipefd[1];
#endif
}
int triggerEventNotifier(struct eventNotifier *en) {
#ifdef HAVE_EVENT_FD
uint64_t u = 1;
if (write(en->efd, &u, sizeof(uint64_t)) == -1) {
return EN_ERR;
}
#else
char buf[1] = {'R'};
if (write(en->pipefd[1], buf, 1) == -1) {
return EN_ERR;
}
#endif
return EN_OK;
}
int handleEventNotifier(struct eventNotifier *en) {
#ifdef HAVE_EVENT_FD
uint64_t u;
if (read(en->efd, &u, sizeof(uint64_t)) == -1) {
return EN_ERR;
}
#else
char buf[1];
if (read(en->pipefd[0], buf, 1) == -1) {
return EN_ERR;
}
#endif
return EN_OK;
}
void freeEventNotifier(struct eventNotifier *en) {
#ifdef HAVE_EVENT_FD
close(en->efd);
#else
close(en->pipefd[0]);
close(en->pipefd[1]);
#endif
/* Free memory */
zfree(en);
}

33
src/eventnotifier.h Normal file
View File

@ -0,0 +1,33 @@
/* eventnotifier.h -- An event notifier based on eventfd or pipe.
*
* Copyright (c) 2024-Present, Redis Ltd.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
*/
#ifndef EVENTNOTIFIER_H
#define EVENTNOTIFIER_H
#include "config.h"
#define EN_OK 0
#define EN_ERR -1
typedef struct eventNotifier {
#ifdef HAVE_EVENT_FD
int efd;
#else
int pipefd[2];
#endif
} eventNotifier;
eventNotifier* createEventNotifier(void);
int getReadEventFd(struct eventNotifier *en);
int getWriteEventFd(struct eventNotifier *en);
int triggerEventNotifier(struct eventNotifier *en);
int handleEventNotifier(struct eventNotifier *en);
void freeEventNotifier(struct eventNotifier *en);
#endif

View File

@ -23,6 +23,9 @@
#include <lua.h>
#include <lauxlib.h>
#include <lualib.h>
#if defined(USE_JEMALLOC)
#include <lstate.h>
#endif
#define LUA_ENGINE_NAME "LUA"
#define REGISTRY_ENGINE_CTX_NAME "__ENGINE_CTX__"
@ -189,8 +192,19 @@ static void luaEngineFreeFunction(void *engine_ctx, void *compiled_function) {
static void luaEngineFreeCtx(void *engine_ctx) {
luaEngineCtx *lua_engine_ctx = engine_ctx;
#if defined(USE_JEMALLOC)
/* When lua is closed, destroy the previously used private tcache. */
void *ud = (global_State*)G(lua_engine_ctx->lua)->ud;
unsigned int lua_tcache = (unsigned int)(uintptr_t)ud;
#endif
lua_gc(lua_engine_ctx->lua, LUA_GCCOLLECT, 0);
lua_close(lua_engine_ctx->lua);
zfree(lua_engine_ctx);
#if defined(USE_JEMALLOC)
je_mallctl("tcache.destroy", NULL, NULL, (void *)&lua_tcache, sizeof(unsigned int));
#endif
}
static void luaRegisterFunctionArgsInitialize(registerFunctionArgs *register_f_args,

View File

@ -144,6 +144,10 @@ static void engineLibraryFree(functionLibInfo* li) {
zfree(li);
}
static void engineLibraryFreeGeneric(void *li) {
engineLibraryFree((functionLibInfo *)li);
}
static void engineLibraryDispose(dict *d, void *obj) {
UNUSED(d);
engineLibraryFree(obj);
@ -338,7 +342,7 @@ static int libraryJoin(functionsLibCtx *functions_lib_ctx_dst, functionsLibCtx *
} else {
if (!old_libraries_list) {
old_libraries_list = listCreate();
listSetFreeMethod(old_libraries_list, (void (*)(void*))engineLibraryFree);
listSetFreeMethod(old_libraries_list, engineLibraryFreeGeneric);
}
libraryUnlink(functions_lib_ctx_dst, old_li);
listAddNodeTail(old_libraries_list, old_li);
@ -1063,7 +1067,7 @@ void functionLoadCommand(client *c) {
}
/* Return memory usage of all the engines combine */
unsigned long functionsMemory(void) {
unsigned long functionsMemoryVM(void) {
dictIterator *iter = dictGetIterator(engines);
dictEntry *entry = NULL;
size_t engines_memory = 0;
@ -1078,7 +1082,7 @@ unsigned long functionsMemory(void) {
}
/* Return memory overhead of all the engines combine */
unsigned long functionsMemoryOverhead(void) {
unsigned long functionsMemoryEngine(void) {
size_t memory_overhead = dictMemUsage(engines);
memory_overhead += dictMemUsage(curr_functions_lib_ctx->functions);
memory_overhead += sizeof(functionsLibCtx);

View File

@ -102,8 +102,8 @@ struct functionLibInfo {
int functionsRegisterEngine(const char *engine_name, engine *engine_ctx);
sds functionsCreateWithLibraryCtx(sds code, int replace, sds* err, functionsLibCtx *lib_ctx, size_t timeout);
unsigned long functionsMemory(void);
unsigned long functionsMemoryOverhead(void);
unsigned long functionsMemoryVM(void);
unsigned long functionsMemoryEngine(void);
unsigned long functionsNum(void);
unsigned long functionsLibNum(void);
dict* functionsLibGet(void);

View File

@ -4,8 +4,13 @@
* Copyright (c) 2014-Present, Redis Ltd.
* All rights reserved.
*
* Copyright (c) 2024-present, Valkey contributors.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
*
* Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
*/
#include "server.h"
@ -13,6 +18,13 @@
#include <stdint.h>
#include <math.h>
#ifdef HAVE_AVX2
/* Define __MM_MALLOC_H to prevent importing the memory aligned
* allocation functions, which we don't use. */
#define __MM_MALLOC_H
#include <immintrin.h>
#endif
/* The Redis HyperLogLog implementation is based on the following ideas:
*
* * The use of a 64 bit hash function as proposed in [1], in order to estimate
@ -186,6 +198,13 @@ struct hllhdr {
static char *invalid_hll_err = "-INVALIDOBJ Corrupted HLL object detected";
#ifdef HAVE_AVX2
static int simd_enabled = 1;
#define HLL_USE_AVX2 (simd_enabled && __builtin_cpu_supports("avx2"))
#else
#define HLL_USE_AVX2 0
#endif
/* =========================== Low level bit macros ========================= */
/* Macros to access the dense representation.
@ -1041,6 +1060,132 @@ int hllAdd(robj *o, unsigned char *ele, size_t elesize) {
}
}
#ifdef HAVE_AVX2
/* A specialized version of hllMergeDense, optimized for default configurations.
*
* Requirements:
* 1) HLL_REGISTERS == 16384 && HLL_BITS == 6
* 2) The CPU supports AVX2 (checked at runtime in hllMergeDense)
*
* reg_raw: pointer to the raw representation array (16384 bytes, one byte per register)
* reg_dense: pointer to the dense representation array (12288 bytes, 6 bits per register)
*/
ATTRIBUTE_TARGET_AVX2
void hllMergeDenseAVX2(uint8_t *reg_raw, const uint8_t *reg_dense) {
const __m256i shuffle = _mm256_setr_epi8( //
4, 5, 6, -1, //
7, 8, 9, -1, //
10, 11, 12, -1, //
13, 14, 15, -1, //
0, 1, 2, -1, //
3, 4, 5, -1, //
6, 7, 8, -1, //
9, 10, 11, -1 //
);
/* Merge the first 8 registers (6 bytes) normally
* as the AVX2 algorithm needs 4 padding bytes at the start */
uint8_t val;
for (int i = 0; i < 8; i++) {
HLL_DENSE_GET_REGISTER(val, reg_dense, i);
if (val > reg_raw[i]) {
reg_raw[i] = val;
}
}
/* Dense to Raw:
*
* 4 registers in 3 bytes:
* {bbaaaaaa|ccccbbbb|ddddddcc}
*
* LOAD 32 bytes (32 registers) per iteration:
* 4(padding) + 12(16 registers) + 12(16 registers) + 4(padding)
* {XXXX|AAAB|BBCC|CDDD|EEEF|FFGG|GHHH|XXXX}
*
* SHUFFLE to:
* {AAA0|BBB0|CCC0|DDD0|EEE0|FFF0|GGG0|HHH0}
* {bbaaaaaa|ccccbbbb|ddddddcc|00000000} x8
*
* AVX2 is little endian, each of the 8 groups is a little-endian int32.
* A group (int32) contains 3 valid bytes (4 registers) and a zero byte.
*
* extract registers in each group with AND and SHIFT:
* {00aaaaaa|00000000|00000000|00000000} x8 (<<0)
* {00000000|00bbbbbb|00000000|00000000} x8 (<<2)
* {00000000|00000000|00cccccc|00000000} x8 (<<4)
* {00000000|00000000|00000000|00dddddd} x8 (<<6)
*
* merge the extracted registers with OR:
* {00aaaaaa|00bbbbbb|00cccccc|00dddddd} x8
*
* Finally, compute MAX(reg_raw, merged) and STORE it back to reg_raw
*/
/* Skip 8 registers (6 bytes) */
const uint8_t *r = reg_dense + 6 - 4;
uint8_t *t = reg_raw + 8;
for (int i = 0; i < HLL_REGISTERS / 32 - 1; ++i) {
__m256i x0, x;
x0 = _mm256_loadu_si256((__m256i *)r);
x = _mm256_shuffle_epi8(x0, shuffle);
__m256i a1, a2, a3, a4;
a1 = _mm256_and_si256(x, _mm256_set1_epi32(0x0000003f));
a2 = _mm256_and_si256(x, _mm256_set1_epi32(0x00000fc0));
a3 = _mm256_and_si256(x, _mm256_set1_epi32(0x0003f000));
a4 = _mm256_and_si256(x, _mm256_set1_epi32(0x00fc0000));
a2 = _mm256_slli_epi32(a2, 2);
a3 = _mm256_slli_epi32(a3, 4);
a4 = _mm256_slli_epi32(a4, 6);
__m256i y1, y2, y;
y1 = _mm256_or_si256(a1, a2);
y2 = _mm256_or_si256(a3, a4);
y = _mm256_or_si256(y1, y2);
__m256i z = _mm256_loadu_si256((__m256i *)t);
z = _mm256_max_epu8(z, y);
_mm256_storeu_si256((__m256i *)t, z);
r += 24;
t += 32;
}
/* Merge the last 24 registers normally
* as the AVX2 algorithm needs 4 padding bytes at the end */
for (int i = HLL_REGISTERS - 24; i < HLL_REGISTERS; i++) {
HLL_DENSE_GET_REGISTER(val, reg_dense, i);
if (val > reg_raw[i]) {
reg_raw[i] = val;
}
}
}
#endif
/* Merge dense-encoded registers to raw registers array. */
void hllMergeDense(uint8_t* reg_raw, const uint8_t* reg_dense) {
#ifdef HAVE_AVX2
if (HLL_REGISTERS == 16384 && HLL_BITS == 6) {
if (HLL_USE_AVX2) {
hllMergeDenseAVX2(reg_raw, reg_dense);
return;
}
}
#endif
uint8_t val;
for (int i = 0; i < HLL_REGISTERS; i++) {
HLL_DENSE_GET_REGISTER(val, reg_dense, i);
if (val > reg_raw[i]) {
reg_raw[i] = val;
}
}
}
/* Merge by computing MAX(registers[i],hll[i]) the HyperLogLog 'hll'
* with an array of uint8_t HLL_REGISTERS registers pointed by 'max'.
*
@ -1054,12 +1199,7 @@ int hllMerge(uint8_t *max, robj *hll) {
int i;
if (hdr->encoding == HLL_DENSE) {
uint8_t val;
for (i = 0; i < HLL_REGISTERS; i++) {
HLL_DENSE_GET_REGISTER(val,hdr->registers,i);
if (val > max[i]) max[i] = val;
}
hllMergeDense(max, hdr->registers);
} else {
uint8_t *p = hll->ptr, *end = p + sdslen(hll->ptr);
long runlen, regval;
@ -1091,6 +1231,117 @@ int hllMerge(uint8_t *max, robj *hll) {
return C_OK;
}
#ifdef HAVE_AVX2
/* A specialized version of hllDenseCompress, optimized for default configurations.
*
* Requirements:
* 1) HLL_REGISTERS == 16384 && HLL_BITS == 6
* 2) The CPU supports AVX2 (checked at runtime in hllDenseCompress)
*
* reg_dense: pointer to the dense representation array (12288 bytes, 6 bits per register)
* reg_raw: pointer to the raw representation array (16384 bytes, one byte per register)
*/
ATTRIBUTE_TARGET_AVX2
void hllDenseCompressAVX2(uint8_t *reg_dense, const uint8_t *reg_raw) {
const __m256i shuffle = _mm256_setr_epi8( //
0, 1, 2, //
4, 5, 6, //
8, 9, 10, //
12, 13, 14, //
-1, -1, -1, -1, //
0, 1, 2, //
4, 5, 6, //
8, 9, 10, //
12, 13, 14, //
-1, -1, -1, -1 //
);
/* Raw to Dense:
*
* LOAD 32 bytes (32 registers) per iteration:
* {00aaaaaa|00bbbbbb|00cccccc|00dddddd} x8
*
* AVX2 is little endian, each of the 8 groups is a little-endian int32.
* A group (int32) contains 4 registers.
*
* move the registers to correct positions with AND and SHIFT:
* {00aaaaaa|00000000|00000000|00000000} x8 (>>0)
* {bb000000|0000bbbb|00000000|00000000} x8 (>>2)
* {00000000|cccc0000|000000cc|00000000} x8 (>>4)
* {00000000|00000000|dddddd00|00000000} x8 (>>6)
*
* merge the registers with OR:
* {bbaaaaaa|ccccbbbb|ddddddcc|00000000} x8
* {AAA0|BBB0|CCC0|DDD0|EEE0|FFF0|GGG0|HHH0}
*
* SHUFFLE to:
* {AAAB|BBCC|CDDD|0000|EEEF|FFGG|GHHH|0000}
*
* STORE the lower half and higher half respectively:
* AAABBBCCCDDD0000
* EEEFFFGGGHHH0000
* AAABBBCCCDDDEEEFFFGGGHHH0000
*
* Note that the last 4 bytes are padding bytes.
*/
const uint8_t *r = reg_raw;
uint8_t *t = reg_dense;
for (int i = 0; i < HLL_REGISTERS / 32 - 1; ++i) {
__m256i x = _mm256_loadu_si256((__m256i *)r);
__m256i a1, a2, a3, a4;
a1 = _mm256_and_si256(x, _mm256_set1_epi32(0x0000003f));
a2 = _mm256_and_si256(x, _mm256_set1_epi32(0x00003f00));
a3 = _mm256_and_si256(x, _mm256_set1_epi32(0x003f0000));
a4 = _mm256_and_si256(x, _mm256_set1_epi32(0x3f000000));
a2 = _mm256_srli_epi32(a2, 2);
a3 = _mm256_srli_epi32(a3, 4);
a4 = _mm256_srli_epi32(a4, 6);
__m256i y1, y2, y;
y1 = _mm256_or_si256(a1, a2);
y2 = _mm256_or_si256(a3, a4);
y = _mm256_or_si256(y1, y2);
y = _mm256_shuffle_epi8(y, shuffle);
__m128i lower, higher;
lower = _mm256_castsi256_si128(y);
higher = _mm256_extracti128_si256(y, 1);
_mm_storeu_si128((__m128i *)t, lower);
_mm_storeu_si128((__m128i *)(t + 12), higher);
r += 32;
t += 24;
}
/* Merge the last 32 registers normally
* as the AVX2 algorithm needs 4 padding bytes at the end */
for (int i = HLL_REGISTERS - 32; i < HLL_REGISTERS; i++) {
HLL_DENSE_SET_REGISTER(reg_dense, i, reg_raw[i]);
}
}
#endif
/* Compress raw registers to dense representation. */
void hllDenseCompress(uint8_t *reg_dense, const uint8_t *reg_raw) {
#ifdef HAVE_AVX2
if (HLL_REGISTERS == 16384 && HLL_BITS == 6) {
if (HLL_USE_AVX2) {
hllDenseCompressAVX2(reg_dense, reg_raw);
return;
}
}
#endif
for (int i = 0; i < HLL_REGISTERS; i++) {
HLL_DENSE_SET_REGISTER(reg_dense, i, reg_raw[i]);
}
}
/* ========================== HyperLogLog commands ========================== */
/* Create an HLL object. We always create the HLL using sparse encoding.
@ -1350,14 +1601,19 @@ void pfmergeCommand(client *c) {
/* Write the resulting HLL to the destination HLL registers and
* invalidate the cached value. */
if (use_dense) {
hdr = o->ptr;
hllDenseCompress(hdr->registers, max);
} else {
for (j = 0; j < HLL_REGISTERS; j++) {
if (max[j] == 0) continue;
hdr = o->ptr;
switch(hdr->encoding) {
switch (hdr->encoding) {
case HLL_DENSE: hllDenseSet(hdr->registers,j,max[j]); break;
case HLL_SPARSE: hllSparseSet(o,j,max[j]); break;
}
}
}
hdr = o->ptr; /* o->ptr may be different now, as a side effect of
last hllSparseSet() call. */
HLL_INVALIDATE_CACHE(hdr);
@ -1484,6 +1740,7 @@ cleanup:
* PFDEBUG DECODE <key>
* PFDEBUG ENCODING <key>
* PFDEBUG TODENSE <key>
* PFDEBUG SIMD (ON|OFF)
*/
void pfdebugCommand(client *c) {
char *cmd = c->argv[1]->ptr;
@ -1491,6 +1748,26 @@ void pfdebugCommand(client *c) {
robj *o;
int j;
if (!strcasecmp(cmd, "simd")) {
if (c->argc != 3) goto arityerr;
if (!strcasecmp(c->argv[2]->ptr, "on")) {
#ifdef HAVE_AVX2
simd_enabled = 1;
#endif
} else if (!strcasecmp(c->argv[2]->ptr, "off")) {
#ifdef HAVE_AVX2
simd_enabled = 0;
#endif
} else {
addReplyError(c, "Argument must be ON or OFF");
}
addReplyStatus(c, HLL_USE_AVX2 ? "enabled" : "disabled");
return;
}
o = lookupKeyWrite(c->db,c->argv[2]);
if (o == NULL) {
addReplyError(c,"The specified key does not exist");

631
src/iothread.c Normal file
View File

@ -0,0 +1,631 @@
/* iothread.c -- The threaded io implementation.
*
* Copyright (c) 2024-Present, Redis Ltd.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
*/
#include "server.h"
/* IO threads. */
static IOThread IOThreads[IO_THREADS_MAX_NUM];
/* For main thread */
static list *mainThreadPendingClientsToIOThreads[IO_THREADS_MAX_NUM]; /* Clients to IO threads */
static list *mainThreadProcessingClients[IO_THREADS_MAX_NUM]; /* Clients in processing */
static list *mainThreadPendingClients[IO_THREADS_MAX_NUM]; /* Pending clients from IO threads */
static pthread_mutex_t mainThreadPendingClientsMutexes[IO_THREADS_MAX_NUM]; /* Mutex for pending clients */
static eventNotifier* mainThreadPendingClientsNotifiers[IO_THREADS_MAX_NUM]; /* Notifier for pending clients */
/* When IO threads read a complete query of clients or want to free clients, it
* should remove it from its clients list and put the client in the list to main
* thread, we will send these clients to main thread in IOThreadBeforeSleep. */
void enqueuePendingClientsToMainThread(client *c, int unbind) {
/* If the IO thread may no longer manage it, such as closing client, we should
* unbind client from event loop, so main thread doesn't need to do it costly. */
if (unbind) connUnbindEventLoop(c->conn);
/* Just skip if it already is transferred. */
if (c->io_thread_client_list_node) {
listDelNode(IOThreads[c->tid].clients, c->io_thread_client_list_node);
c->io_thread_client_list_node = NULL;
/* Disable read and write to avoid race when main thread processes. */
c->io_flags &= ~(CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED);
listAddNodeTail(IOThreads[c->tid].pending_clients_to_main_thread, c);
}
}
/* Unbind connection of client from io thread event loop, write and read handlers
* also be removed, ensures that we can operate the client safely. */
void unbindClientFromIOThreadEventLoop(client *c) {
serverAssert(c->tid != IOTHREAD_MAIN_THREAD_ID &&
c->running_tid == IOTHREAD_MAIN_THREAD_ID);
if (!connHasEventLoop(c->conn)) return;
/* As calling in main thread, we should pause the io thread to make it safe. */
pauseIOThread(c->tid);
connUnbindEventLoop(c->conn);
resumeIOThread(c->tid);
}
/* When main thread is processing a client from IO thread, and wants to keep it,
* we should unbind connection of client from io thread event loop first,
* and then bind the client connection into server's event loop. */
void keepClientInMainThread(client *c) {
serverAssert(c->tid != IOTHREAD_MAIN_THREAD_ID &&
c->running_tid == IOTHREAD_MAIN_THREAD_ID);
/* IO thread no longer manage it. */
server.io_threads_clients_num[c->tid]--;
/* Unbind connection of client from io thread event loop. */
unbindClientFromIOThreadEventLoop(c);
/* Let main thread to run it, rebind event loop and read handler */
connRebindEventLoop(c->conn, server.el);
connSetReadHandler(c->conn, readQueryFromClient);
c->io_flags |= CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED;
c->running_tid = IOTHREAD_MAIN_THREAD_ID;
c->tid = IOTHREAD_MAIN_THREAD_ID;
/* Main thread starts to manage it. */
server.io_threads_clients_num[c->tid]++;
}
/* If the client is managed by IO thread, we should fetch it from IO thread
* and then main thread will can process it. Just like IO Thread transfers
* the client to the main thread for processing. */
void fetchClientFromIOThread(client *c) {
serverAssert(c->tid != IOTHREAD_MAIN_THREAD_ID &&
c->running_tid != IOTHREAD_MAIN_THREAD_ID);
pauseIOThread(c->tid);
/* Remove the client from clients list of IO thread or main thread. */
if (c->io_thread_client_list_node) {
listDelNode(IOThreads[c->tid].clients, c->io_thread_client_list_node);
c->io_thread_client_list_node = NULL;
} else {
list *clients[5] = {
IOThreads[c->tid].pending_clients,
IOThreads[c->tid].pending_clients_to_main_thread,
mainThreadPendingClients[c->tid],
mainThreadProcessingClients[c->tid],
mainThreadPendingClientsToIOThreads[c->tid]
};
for (int i = 0; i < 5; i++) {
listNode *ln = listSearchKey(clients[i], c);
if (ln) {
listDelNode(clients[i], ln);
/* Client only can be in one client list. */
break;
}
}
}
/* Unbind connection of client from io thread event loop. */
connUnbindEventLoop(c->conn);
/* Now main thread can process it. */
c->running_tid = IOTHREAD_MAIN_THREAD_ID;
resumeIOThread(c->tid);
}
/* For some clients, we must handle them in the main thread, since there is
* data race to be processed in IO threads.
*
* - Close ASAP, we must free the client in main thread.
* - Replica, pubsub, monitor, blocked, tracking clients, main thread may
* directly write them a reply when conditions are met.
* - Script command with debug may operate connection directly. */
int isClientMustHandledByMainThread(client *c) {
if (c->flags & (CLIENT_CLOSE_ASAP | CLIENT_MASTER | CLIENT_SLAVE |
CLIENT_PUBSUB | CLIENT_MONITOR | CLIENT_BLOCKED |
CLIENT_UNBLOCKED | CLIENT_TRACKING | CLIENT_LUA_DEBUG |
CLIENT_LUA_DEBUG_SYNC))
{
return 1;
}
return 0;
}
/* When the main thread accepts a new client or transfers clients to IO threads,
* it assigns the client to the IO thread with the fewest clients. */
void assignClientToIOThread(client *c) {
serverAssert(c->tid == IOTHREAD_MAIN_THREAD_ID);
/* Find the IO thread with the fewest clients. */
int min_id = 0;
int min = INT_MAX;
for (int i = 1; i < server.io_threads_num; i++) {
if (server.io_threads_clients_num[i] < min) {
min = server.io_threads_clients_num[i];
min_id = i;
}
}
/* Assign the client to the IO thread. */
server.io_threads_clients_num[c->tid]--;
c->tid = min_id;
c->running_tid = min_id;
server.io_threads_clients_num[min_id]++;
/* Unbind connection of client from main thread event loop, disable read and
* write, and then put it in the list, main thread will send these clients
* to IO thread in beforeSleep. */
connUnbindEventLoop(c->conn);
c->io_flags &= ~(CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED);
listAddNodeTail(mainThreadPendingClientsToIOThreads[c->tid], c);
}
/* If updating maxclients config, we not only resize the event loop of main thread
* but also resize the event loop of all io threads, and if one thread is failed,
* it is failed totally, since a fd can be distributed into any IO thread. */
int resizeAllIOThreadsEventLoops(size_t newsize) {
int result = AE_OK;
if (server.io_threads_num <= 1) return result;
/* To make context safe. */
pauseAllIOThreads();
for (int i = 1; i < server.io_threads_num; i++) {
IOThread *t = &IOThreads[i];
if (aeResizeSetSize(t->el, newsize) == AE_ERR)
result = AE_ERR;
}
resumeAllIOThreads();
return result;
}
/* In the main thread, we may want to operate data of io threads, maybe uninstall
* event handler, access query/output buffer or resize event loop, we need a clean
* and safe context to do that. We pause io thread in IOThreadBeforeSleep, do some
* jobs and then resume it. To avoid thread suspended, we use busy waiting to confirm
* the target status. Besides we use atomic variable to make sure memory visibility
* and ordering.
*
* Make sure that only the main thread can call these function,
* - pauseIOThread, resumeIOThread
* - pauseAllIOThreads, resumeAllIOThreads
* - pauseIOThreadsRange, resumeIOThreadsRange
*
* The main thread will pause the io thread, and then wait for the io thread to
* be paused. The io thread will check the paused status in IOThreadBeforeSleep,
* and then pause itself.
*
* The main thread will resume the io thread, and then wait for the io thread to
* be resumed. The io thread will check the paused status in IOThreadBeforeSleep,
* and then resume itself.
*/
/* We may pause the same io thread nestedly, so we need to record the times of
* pausing, and only when the times of pausing is 0, we can pause the io thread,
* and only when the times of pausing is 1, we can resume the io thread. */
static int PausedIOThreads[IO_THREADS_MAX_NUM] = {0};
/* Pause the specific range of io threads, and wait for them to be paused. */
void pauseIOThreadsRange(int start, int end) {
if (!server.io_threads_active) return;
serverAssert(start >= 1 && end < server.io_threads_num && start <= end);
serverAssert(pthread_equal(pthread_self(), server.main_thread_id));
/* Try to make all io threads paused in parallel */
for (int i = start; i <= end; i++) {
PausedIOThreads[i]++;
/* Skip if already paused */
if (PausedIOThreads[i] > 1) continue;
int paused;
atomicGetWithSync(IOThreads[i].paused, paused);
/* Don't support to call reentrant */
serverAssert(paused == IO_THREAD_UNPAUSED);
atomicSetWithSync(IOThreads[i].paused, IO_THREAD_PAUSING);
/* Just notify io thread, no actual job, since io threads check paused
* status in IOThreadBeforeSleep, so just wake it up if polling wait. */
triggerEventNotifier(IOThreads[i].pending_clients_notifier);
}
/* Wait for all io threads paused */
for (int i = start; i <= end; i++) {
if (PausedIOThreads[i] > 1) continue;
int paused = IO_THREAD_PAUSING;
while (paused != IO_THREAD_PAUSED) {
atomicGetWithSync(IOThreads[i].paused, paused);
}
}
}
/* Resume the specific range of io threads, and wait for them to be resumed. */
void resumeIOThreadsRange(int start, int end) {
if (!server.io_threads_active) return;
serverAssert(start >= 1 && end < server.io_threads_num && start <= end);
serverAssert(pthread_equal(pthread_self(), server.main_thread_id));
for (int i = start; i <= end; i++) {
serverAssert(PausedIOThreads[i] > 0);
PausedIOThreads[i]--;
if (PausedIOThreads[i] > 0) continue;
int paused;
/* Check if it is paused, since we must call 'pause' and
* 'resume' in pairs */
atomicGetWithSync(IOThreads[i].paused, paused);
serverAssert(paused == IO_THREAD_PAUSED);
/* Resume */
atomicSetWithSync(IOThreads[i].paused, IO_THREAD_RESUMING);
while (paused != IO_THREAD_UNPAUSED) {
atomicGetWithSync(IOThreads[i].paused, paused);
}
}
}
/* The IO thread checks whether it is being paused, and if so, it pauses itself
* and waits for resuming, corresponding to the pause/resumeIOThread* functions.
* Currently, this is only called in IOThreadBeforeSleep, as there are no pending
* I/O events at this point, with a clean context. */
void handlePauseAndResume(IOThread *t) {
int paused;
/* Check if i am being paused. */
atomicGetWithSync(t->paused, paused);
if (paused == IO_THREAD_PAUSING) {
atomicSetWithSync(t->paused, IO_THREAD_PAUSED);
/* Wait for resuming */
while (paused != IO_THREAD_RESUMING) {
atomicGetWithSync(t->paused, paused);
}
atomicSetWithSync(t->paused, IO_THREAD_UNPAUSED);
}
}
/* Pause the specific io thread, and wait for it to be paused. */
void pauseIOThread(int id) {
pauseIOThreadsRange(id, id);
}
/* Resume the specific io thread, and wait for it to be resumed. */
void resumeIOThread(int id) {
resumeIOThreadsRange(id, id);
}
/* Pause all io threads, and wait for them to be paused. */
void pauseAllIOThreads(void) {
pauseIOThreadsRange(1, server.io_threads_num-1);
}
/* Resume all io threads, and wait for them to be resumed. */
void resumeAllIOThreads(void) {
resumeIOThreadsRange(1, server.io_threads_num-1);
}
/* Add the pending clients to the list of IO threads, and trigger an event to
* notify io threads to handle. */
int sendPendingClientsToIOThreads(void) {
int processed = 0;
for (int i = 1; i < server.io_threads_num; i++) {
int len = listLength(mainThreadPendingClientsToIOThreads[i]);
if (len > 0) {
IOThread *t = &IOThreads[i];
pthread_mutex_lock(&t->pending_clients_mutex);
listJoin(t->pending_clients, mainThreadPendingClientsToIOThreads[i]);
pthread_mutex_unlock(&t->pending_clients_mutex);
/* Trigger an event, maybe an error is returned when buffer is full
* if using pipe, but no worry, io thread will handle all clients
* in list when receiving a notification. */
triggerEventNotifier(t->pending_clients_notifier);
}
processed += len;
}
return processed;
}
extern int ProcessingEventsWhileBlocked;
/* The main thread processes the clients from IO threads, these clients may have
* a complete command to execute or need to be freed. Note that IO threads never
* free client since this operation access much server data.
*
* Please notice that this function may be called reentrantly, i,e, the same goes
* for handleClientsFromIOThread and processClientsOfAllIOThreads. For example,
* when processing script command, it may call processEventsWhileBlocked to
* process new events, if the clients with fired events from the same io thread,
* it may call this function reentrantly. */
void processClientsFromIOThread(IOThread *t) {
listNode *node = NULL;
while (listLength(mainThreadProcessingClients[t->id])) {
/* Each time we pop up only the first client to process to guarantee
* reentrancy safety. */
if (node) zfree(node);
node = listFirst(mainThreadProcessingClients[t->id]);
listUnlinkNode(mainThreadProcessingClients[t->id], node);
client *c = listNodeValue(node);
/* Make sure the client is readable or writable in io thread to
* avoid data race. */
serverAssert(!(c->io_flags & (CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED)));
serverAssert(!(c->flags & CLIENT_CLOSE_ASAP));
/* Let main thread to run it, set running thread id first. */
c->running_tid = IOTHREAD_MAIN_THREAD_ID;
/* If a read error occurs, handle it in the main thread first, since we
* want to print logs about client information before freeing. */
if (c->read_error) handleClientReadError(c);
/* The client is asked to close in IO thread. */
if (c->io_flags & CLIENT_IO_CLOSE_ASAP) {
freeClient(c);
continue;
}
/* Update the client in the mem usage */
updateClientMemUsageAndBucket(c);
/* Process the pending command and input buffer. */
if (!c->read_error && c->io_flags & CLIENT_IO_PENDING_COMMAND) {
c->flags |= CLIENT_PENDING_COMMAND;
if (processPendingCommandAndInputBuffer(c) == C_ERR) {
/* If the client is no longer valid, it must be freed safely. */
continue;
}
}
/* We may have pending replies if io thread may not finish writing
* reply to client, so we did not put the client in pending write
* queue. And we should do that first since we may keep the client
* in main thread instead of returning to io threads. */
if (!(c->flags & CLIENT_PENDING_WRITE) && clientHasPendingReplies(c))
putClientInPendingWriteQueue(c);
/* The client only can be processed in the main thread, otherwise data
* race will happen, since we may touch client's data in main thread. */
if (isClientMustHandledByMainThread(c)) {
keepClientInMainThread(c);
continue;
}
/* Remove this client from pending write clients queue of main thread,
* And some clients may do not have reply if CLIENT REPLY OFF/SKIP. */
if (c->flags & CLIENT_PENDING_WRITE) {
c->flags &= ~CLIENT_PENDING_WRITE;
listUnlinkNode(server.clients_pending_write, &c->clients_pending_write_node);
}
c->running_tid = c->tid;
listLinkNodeHead(mainThreadPendingClientsToIOThreads[c->tid], node);
node = NULL;
}
if (node) zfree(node);
/* Trigger the io thread to handle these clients ASAP to make them processed
* in parallel.
*
* If AOF fsync policy is always, we should not let io thread handle these
* clients now since we don't flush AOF buffer to file and sync yet.
* So these clients will be delayed to send io threads in beforeSleep after
* flushAppendOnlyFile.
*
* If we are in processEventsWhileBlocked, we don't send clients to io threads
* now, we want to update server.events_processed_while_blocked accurately. */
if (listLength(mainThreadPendingClientsToIOThreads[t->id]) &&
server.aof_fsync != AOF_FSYNC_ALWAYS &&
!ProcessingEventsWhileBlocked)
{
pthread_mutex_lock(&(t->pending_clients_mutex));
listJoin(t->pending_clients, mainThreadPendingClientsToIOThreads[t->id]);
pthread_mutex_unlock(&(t->pending_clients_mutex));
triggerEventNotifier(t->pending_clients_notifier);
}
}
/* When the io thread finishes processing the client with the read event, it will
* notify the main thread through event triggering in IOThreadBeforeSleep. The main
* thread handles the event through this function. */
void handleClientsFromIOThread(struct aeEventLoop *el, int fd, void *ptr, int mask) {
UNUSED(el);
UNUSED(mask);
IOThread *t = ptr;
/* Handle fd event first. */
serverAssert(fd == getReadEventFd(mainThreadPendingClientsNotifiers[t->id]));
handleEventNotifier(mainThreadPendingClientsNotifiers[t->id]);
/* Get the list of clients to process. */
pthread_mutex_lock(&mainThreadPendingClientsMutexes[t->id]);
listJoin(mainThreadProcessingClients[t->id], mainThreadPendingClients[t->id]);
pthread_mutex_unlock(&mainThreadPendingClientsMutexes[t->id]);
if (listLength(mainThreadProcessingClients[t->id]) == 0) return;
/* Process the clients from IO threads. */
processClientsFromIOThread(t);
}
/* In the new threaded io design, one thread may process multiple clients, so when
* an io thread notifies the main thread of an event, there may be multiple clients
* with commands that need to be processed. But in the event handler function
* handleClientsFromIOThread may be blocked when processing the specific command,
* the previous clients can not get a reply, and the subsequent clients can not be
* processed, so we need to handle this scenario in beforeSleep. The function is to
* process the commands of subsequent clients from io threads. And another function
* sendPendingClientsToIOThreads make sure clients from io thread can get replies.
* See also beforeSleep. */
void processClientsOfAllIOThreads(void) {
for (int i = 1; i < server.io_threads_num; i++) {
processClientsFromIOThread(&IOThreads[i]);
}
}
/* After the main thread processes the clients, it will send the clients back to
* io threads to handle, and fire an event, the io thread handles the event by
* this function. If the client is not binded to the event loop, we should bind
* it first and install read handler, and we don't uninstall client read handler
* unless freeing client. If the client has pending reply, we just reply to client
* first, and then install write handler if needed. */
void handleClientsFromMainThread(struct aeEventLoop *ae, int fd, void *ptr, int mask) {
UNUSED(ae);
UNUSED(mask);
IOThread *t = ptr;
/* Handle fd event first. */
serverAssert(fd == getReadEventFd(t->pending_clients_notifier));
handleEventNotifier(t->pending_clients_notifier);
pthread_mutex_lock(&t->pending_clients_mutex);
listJoin(t->processing_clients, t->pending_clients);
pthread_mutex_unlock(&t->pending_clients_mutex);
if (listLength(t->processing_clients) == 0) return;
listIter li;
listNode *ln;
listRewind(t->processing_clients, &li);
while((ln = listNext(&li))) {
client *c = listNodeValue(ln);
serverAssert(!(c->io_flags & (CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED)));
/* Main thread must handle clients with CLIENT_CLOSE_ASAP flag, since
* we only set io_flags when clients in io thread are freed ASAP. */
serverAssert(!(c->flags & CLIENT_CLOSE_ASAP));
/* Link client in IO thread clients list first. */
serverAssert(c->io_thread_client_list_node == NULL);
listAddNodeTail(t->clients, c);
c->io_thread_client_list_node = listLast(t->clients);
/* The client is asked to close, we just let main thread free it. */
if (c->io_flags & CLIENT_IO_CLOSE_ASAP) {
enqueuePendingClientsToMainThread(c, 1);
continue;
}
/* Enable read and write and reset some flags. */
c->io_flags |= CLIENT_IO_READ_ENABLED | CLIENT_IO_WRITE_ENABLED;
c->io_flags &= ~CLIENT_IO_PENDING_COMMAND;
/* Only bind once, we never remove read handler unless freeing client. */
if (!connHasEventLoop(c->conn)) {
connRebindEventLoop(c->conn, t->el);
serverAssert(!connHasReadHandler(c->conn));
connSetReadHandler(c->conn, readQueryFromClient);
}
/* If the client has pending replies, write replies to client. */
if (clientHasPendingReplies(c)) {
writeToClient(c, 0);
if (!(c->io_flags & CLIENT_IO_CLOSE_ASAP) && clientHasPendingReplies(c)) {
connSetWriteHandler(c->conn, sendReplyToClient);
}
}
}
listEmpty(t->processing_clients);
}
void IOThreadBeforeSleep(struct aeEventLoop *el) {
IOThread *t = el->privdata[0];
/* Handle pending data(typical TLS). */
connTypeProcessPendingData(el);
/* If any connection type(typical TLS) still has pending unread data don't sleep at all. */
aeSetDontWait(el, connTypeHasPendingData(el));
/* Check if i am being paused, pause myself and resume. */
handlePauseAndResume(t);
/* Check if there are clients to be processed in main thread, and then join
* them to the list of main thread. */
if (listLength(t->pending_clients_to_main_thread) > 0) {
pthread_mutex_lock(&mainThreadPendingClientsMutexes[t->id]);
listJoin(mainThreadPendingClients[t->id], t->pending_clients_to_main_thread);
pthread_mutex_unlock(&mainThreadPendingClientsMutexes[t->id]);
/* Trigger an event, maybe an error is returned when buffer is full
* if using pipe, but no worry, main thread will handle all clients
* in list when receiving a notification. */
triggerEventNotifier(mainThreadPendingClientsNotifiers[t->id]);
}
}
/* The main function of IO thread, it will run an event loop. The mian thread
* and IO thread will communicate through event notifier. */
void *IOThreadMain(void *ptr) {
IOThread *t = ptr;
char thdname[16];
snprintf(thdname, sizeof(thdname), "io_thd_%d", t->id);
redis_set_thread_title(thdname);
redisSetCpuAffinity(server.server_cpulist);
makeThreadKillable();
aeSetBeforeSleepProc(t->el, IOThreadBeforeSleep);
aeMain(t->el);
return NULL;
}
/* Initialize the data structures needed for threaded I/O. */
void initThreadedIO(void) {
if (server.io_threads_num <= 1) return;
server.io_threads_active = 1;
if (server.io_threads_num > IO_THREADS_MAX_NUM) {
serverLog(LL_WARNING,"Fatal: too many I/O threads configured. "
"The maximum number is %d.", IO_THREADS_MAX_NUM);
exit(1);
}
/* Spawn and initialize the I/O threads. */
for (int i = 1; i < server.io_threads_num; i++) {
IOThread *t = &IOThreads[i];
t->id = i;
t->el = aeCreateEventLoop(server.maxclients+CONFIG_FDSET_INCR);
t->el->privdata[0] = t;
t->pending_clients = listCreate();
t->processing_clients = listCreate();
t->pending_clients_to_main_thread = listCreate();
t->clients = listCreate();
atomicSetWithSync(t->paused, IO_THREAD_UNPAUSED);
pthread_mutexattr_t *attr = NULL;
#if defined(__linux__) && defined(__GLIBC__)
attr = zmalloc(sizeof(pthread_mutexattr_t));
pthread_mutexattr_init(attr);
pthread_mutexattr_settype(attr, PTHREAD_MUTEX_ADAPTIVE_NP);
#endif
pthread_mutex_init(&t->pending_clients_mutex, attr);
t->pending_clients_notifier = createEventNotifier();
if (aeCreateFileEvent(t->el, getReadEventFd(t->pending_clients_notifier),
AE_READABLE, handleClientsFromMainThread, t) != AE_OK)
{
serverLog(LL_WARNING, "Fatal: Can't register file event for IO thread notifications.");
exit(1);
}
/* Create IO thread */
if (pthread_create(&t->tid, NULL, IOThreadMain, (void*)t) != 0) {
serverLog(LL_WARNING, "Fatal: Can't initialize IO thread.");
exit(1);
}
/* For main thread */
mainThreadPendingClientsToIOThreads[i] = listCreate();
mainThreadPendingClients[i] = listCreate();
mainThreadProcessingClients[i] = listCreate();
pthread_mutex_init(&mainThreadPendingClientsMutexes[i], attr);
mainThreadPendingClientsNotifiers[i] = createEventNotifier();
if (aeCreateFileEvent(server.el, getReadEventFd(mainThreadPendingClientsNotifiers[i]),
AE_READABLE, handleClientsFromIOThread, t) != AE_OK)
{
serverLog(LL_WARNING, "Fatal: Can't register file event for main thread notifications.");
exit(1);
}
if (attr) zfree(attr);
}
}
/* Kill the IO threads, TODO: release the applied resources. */
void killIOThreads(void) {
if (server.io_threads_num <= 1) return;
int err, j;
for (j = 1; j < server.io_threads_num; j++) {
if (IOThreads[j].tid == pthread_self()) continue;
if (IOThreads[j].tid && pthread_cancel(IOThreads[j].tid) == 0) {
if ((err = pthread_join(IOThreads[j].tid,NULL)) != 0) {
serverLog(LL_WARNING,
"IO thread(tid:%lu) can not be joined: %s",
(unsigned long)IOThreads[j].tid, strerror(err));
} else {
serverLog(LL_WARNING,
"IO thread(tid:%lu) terminated",(unsigned long)IOThreads[j].tid);
}
}
}
}

View File

@ -42,6 +42,7 @@ struct _kvstore {
unsigned long long *dict_size_index; /* Binary indexed tree (BIT) that describes cumulative key frequencies up until given dict-index. */
size_t overhead_hashtable_lut; /* The overhead of all dictionaries. */
size_t overhead_hashtable_rehashing; /* The overhead of dictionaries rehashing. */
void *metadata[]; /* conditionally allocated based on "flags" */
};
/* Structure for kvstore iterator that allows iterating across multiple dicts. */
@ -59,10 +60,17 @@ struct _kvstoreDictIterator {
dictIterator di;
};
/* Dict metadata for database, used for record the position in rehashing list. */
/* Basic metadata allocated per dict */
typedef struct {
listNode *rehashing_node; /* list node in rehashing list */
} kvstoreDictMetadata;
} kvstoreDictMetaBase;
/* Conditionally metadata allocated per dict (specifically for keysizes histogram) */
typedef struct {
kvstoreDictMetaBase base; /* must be first in struct ! */
/* External metadata */
kvstoreDictMetadata meta;
} kvstoreDictMetaEx;
/**********************************/
/*** Helpers **********************/
@ -184,7 +192,7 @@ static void freeDictIfNeeded(kvstore *kvs, int didx) {
* If there's one dict, bucket count can be retrieved directly from single dict bucket. */
static void kvstoreDictRehashingStarted(dict *d) {
kvstore *kvs = d->type->userdata;
kvstoreDictMetadata *metadata = (kvstoreDictMetadata *)dictMetadata(d);
kvstoreDictMetaBase *metadata = (kvstoreDictMetaBase *)dictMetadata(d);
listAddNodeTail(kvs->rehashing, d);
metadata->rehashing_node = listLast(kvs->rehashing);
@ -201,7 +209,7 @@ static void kvstoreDictRehashingStarted(dict *d) {
* the old ht size of the dictionary from the total sum of buckets for a DB. */
static void kvstoreDictRehashingCompleted(dict *d) {
kvstore *kvs = d->type->userdata;
kvstoreDictMetadata *metadata = (kvstoreDictMetadata *)dictMetadata(d);
kvstoreDictMetaBase *metadata = (kvstoreDictMetaBase *)dictMetadata(d);
if (metadata->rehashing_node) {
listDelNode(kvs->rehashing, metadata->rehashing_node);
metadata->rehashing_node = NULL;
@ -214,10 +222,15 @@ static void kvstoreDictRehashingCompleted(dict *d) {
kvs->overhead_hashtable_rehashing -= from;
}
/* Returns the size of the DB dict metadata in bytes. */
static size_t kvstoreDictMetadataSize(dict *d) {
/* Returns the size of the DB dict base metadata in bytes. */
static size_t kvstoreDictMetaBaseSize(dict *d) {
UNUSED(d);
return sizeof(kvstoreDictMetadata);
return sizeof(kvstoreDictMetaBase);
}
/* Returns the size of the DB dict extended metadata in bytes. */
static size_t kvstoreDictMetadataExtendSize(dict *d) {
UNUSED(d);
return sizeof(kvstoreDictMetaEx);
}
/**********************************/
@ -232,7 +245,13 @@ kvstore *kvstoreCreate(dictType *type, int num_dicts_bits, int flags) {
* for the dict cursor, see kvstoreScan */
assert(num_dicts_bits <= 16);
kvstore *kvs = zcalloc(sizeof(*kvs));
/* Calc kvstore size */
size_t kvsize = sizeof(kvstore);
/* Conditionally calc also histogram size */
if (flags & KVSTORE_ALLOC_META_KEYS_HIST)
kvsize += sizeof(kvstoreMetadata);
kvstore *kvs = zcalloc(kvsize);
memcpy(&kvs->dtype, type, sizeof(kvs->dtype));
kvs->flags = flags;
@ -243,7 +262,10 @@ kvstore *kvstoreCreate(dictType *type, int num_dicts_bits, int flags) {
assert(!type->rehashingStarted);
assert(!type->rehashingCompleted);
kvs->dtype.userdata = kvs;
kvs->dtype.dictMetadataBytes = kvstoreDictMetadataSize;
if (flags & KVSTORE_ALLOC_META_KEYS_HIST)
kvs->dtype.dictMetadataBytes = kvstoreDictMetadataExtendSize;
else
kvs->dtype.dictMetadataBytes = kvstoreDictMetaBaseSize;
kvs->dtype.rehashingStarted = kvstoreDictRehashingStarted;
kvs->dtype.rehashingCompleted = kvstoreDictRehashingCompleted;
@ -263,7 +285,6 @@ kvstore *kvstoreCreate(dictType *type, int num_dicts_bits, int flags) {
kvs->bucket_count = 0;
kvs->overhead_hashtable_lut = 0;
kvs->overhead_hashtable_rehashing = 0;
return kvs;
}
@ -272,9 +293,13 @@ void kvstoreEmpty(kvstore *kvs, void(callback)(dict*)) {
dict *d = kvstoreGetDict(kvs, didx);
if (!d)
continue;
kvstoreDictMetadata *metadata = (kvstoreDictMetadata *)dictMetadata(d);
kvstoreDictMetaBase *metadata = (kvstoreDictMetaBase *)dictMetadata(d);
if (metadata->rehashing_node)
metadata->rehashing_node = NULL;
if (kvs->flags & KVSTORE_ALLOC_META_KEYS_HIST) {
kvstoreDictMetaEx *metaExt = (kvstoreDictMetaEx *) metadata;
memset(&metaExt->meta.keysizes_hist, 0, sizeof(metaExt->meta.keysizes_hist));
}
dictEmpty(d, callback);
freeDictIfNeeded(kvs, didx);
}
@ -296,7 +321,7 @@ void kvstoreRelease(kvstore *kvs) {
dict *d = kvstoreGetDict(kvs, didx);
if (!d)
continue;
kvstoreDictMetadata *metadata = (kvstoreDictMetadata *)dictMetadata(d);
kvstoreDictMetaBase *metadata = (kvstoreDictMetaBase *)dictMetadata(d);
if (metadata->rehashing_node)
metadata->rehashing_node = NULL;
dictRelease(d);
@ -330,11 +355,15 @@ unsigned long kvstoreBuckets(kvstore *kvs) {
size_t kvstoreMemUsage(kvstore *kvs) {
size_t mem = sizeof(*kvs);
size_t metaSize = sizeof(kvstoreDictMetaBase);
if (kvs->flags & KVSTORE_ALLOC_META_KEYS_HIST)
metaSize = sizeof(kvstoreDictMetaEx);
unsigned long long keys_count = kvstoreSize(kvs);
mem += keys_count * dictEntryMemUsage() +
kvstoreBuckets(kvs) * sizeof(dictEntry*) +
kvs->allocated_dicts * (sizeof(dict) + kvstoreDictMetadataSize(NULL));
kvs->allocated_dicts * (sizeof(dict) + metaSize);
/* Values are dict* shared with kvs->dicts */
mem += listLength(kvs->rehashing) * sizeof(listNode);
@ -737,12 +766,12 @@ dictEntry *kvstoreDictGetFairRandomKey(kvstore *kvs, int didx)
return dictGetFairRandomKey(d);
}
dictEntry *kvstoreDictFindEntryByPtrAndHash(kvstore *kvs, int didx, const void *oldptr, uint64_t hash)
dictEntry *kvstoreDictFindByHashAndPtr(kvstore *kvs, int didx, const void *oldptr, uint64_t hash)
{
dict *d = kvstoreGetDict(kvs, didx);
if (!d)
return NULL;
return dictFindEntryByPtrAndHash(d, oldptr, hash);
return dictFindByHashAndPtr(d, oldptr, hash);
}
unsigned int kvstoreDictGetSomeKeys(kvstore *kvs, int didx, dictEntry **des, unsigned int count)
@ -785,7 +814,7 @@ void kvstoreDictLUTDefrag(kvstore *kvs, kvstoreDictLUTDefragFunction *defragfn)
/* After defragmenting the dict, update its corresponding
* rehashing node in the kvstore's rehashing list. */
kvstoreDictMetadata *metadata = (kvstoreDictMetadata *)dictMetadata(*d);
kvstoreDictMetaBase *metadata = (kvstoreDictMetaBase *)dictMetadata(*d);
if (metadata->rehashing_node)
metadata->rehashing_node->value = *d;
}
@ -856,6 +885,19 @@ int kvstoreDictDelete(kvstore *kvs, int didx, const void *key) {
return ret;
}
kvstoreDictMetadata *kvstoreGetDictMetadata(kvstore *kvs, int didx) {
dict *d = kvstoreGetDict(kvs, didx);
if ((!d) || (!(kvs->flags & KVSTORE_ALLOC_META_KEYS_HIST)))
return NULL;
kvstoreDictMetaEx *metadata = (kvstoreDictMetaEx *)dictMetadata(d);
return &(metadata->meta);
}
kvstoreMetadata *kvstoreGetMetadata(kvstore *kvs) {
return (kvstoreMetadata *) &kvs->metadata;
}
#ifdef REDIS_TEST
#include <stdio.h>
#include "testhelp.h"
@ -1029,7 +1071,8 @@ int kvstoreTest(int argc, char **argv, int flags) {
}
TEST("Verify non-empty dict count is correctly updated") {
kvstore *kvs = kvstoreCreate(&KvstoreDictTestType, 2, KVSTORE_ALLOCATE_DICTS_ON_DEMAND);
kvstore *kvs = kvstoreCreate(&KvstoreDictTestType, 2,
KVSTORE_ALLOCATE_DICTS_ON_DEMAND | KVSTORE_ALLOC_META_KEYS_HIST);
for (int idx = 0; idx < 4; idx++) {
for (i = 0; i < 16; i++) {
de = kvstoreDictAddRaw(kvs, idx, stringFromInt(i), NULL);

View File

@ -4,6 +4,21 @@
#include "dict.h"
#include "adlist.h"
/* maximum number of bins of keysizes histogram */
#define MAX_KEYSIZES_BINS 48
#define MAX_KEYSIZES_TYPES 5 /* static_assert at db.c verifies == OBJ_TYPE_BASIC_MAX */
/* When creating kvstore with flag `KVSTORE_ALLOC_META_KEYS_HIST`, then kvstore
* alloc and memset struct kvstoreMetadata on init, yet, managed outside kvstore */
typedef struct {
uint64_t keysizes_hist[MAX_KEYSIZES_TYPES][MAX_KEYSIZES_BINS];
} kvstoreMetadata;
/* Like kvstoreMetadata, this one per dict */
typedef struct {
uint64_t keysizes_hist[MAX_KEYSIZES_TYPES][MAX_KEYSIZES_BINS];
} kvstoreDictMetadata;
typedef struct _kvstore kvstore;
typedef struct _kvstoreIterator kvstoreIterator;
typedef struct _kvstoreDictIterator kvstoreDictIterator;
@ -13,6 +28,7 @@ typedef int (kvstoreExpandShouldSkipDictIndex)(int didx);
#define KVSTORE_ALLOCATE_DICTS_ON_DEMAND (1<<0)
#define KVSTORE_FREE_EMPTY_DICTS (1<<1)
#define KVSTORE_ALLOC_META_KEYS_HIST (1<<2) /* Alloc keysizes histogram */
kvstore *kvstoreCreate(dictType *type, int num_dicts_bits, int flags);
void kvstoreEmpty(kvstore *kvs, void(callback)(dict*));
void kvstoreRelease(kvstore *kvs);
@ -57,7 +73,7 @@ void kvstoreReleaseDictIterator(kvstoreDictIterator *kvs_id);
dictEntry *kvstoreDictIteratorNext(kvstoreDictIterator *kvs_di);
dictEntry *kvstoreDictGetRandomKey(kvstore *kvs, int didx);
dictEntry *kvstoreDictGetFairRandomKey(kvstore *kvs, int didx);
dictEntry *kvstoreDictFindEntryByPtrAndHash(kvstore *kvs, int didx, const void *oldptr, uint64_t hash);
dictEntry *kvstoreDictFindByHashAndPtr(kvstore *kvs, int didx, const void *oldptr, uint64_t hash);
unsigned int kvstoreDictGetSomeKeys(kvstore *kvs, int didx, dictEntry **des, unsigned int count);
int kvstoreDictExpand(kvstore *kvs, int didx, unsigned long size);
unsigned long kvstoreDictScanDefrag(kvstore *kvs, int didx, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata);
@ -71,6 +87,8 @@ void kvstoreDictSetVal(kvstore *kvs, int didx, dictEntry *de, void *val);
dictEntry *kvstoreDictTwoPhaseUnlinkFind(kvstore *kvs, int didx, const void *key, dictEntry ***plink, int *table_index);
void kvstoreDictTwoPhaseUnlinkFree(kvstore *kvs, int didx, dictEntry *he, dictEntry **plink, int table_index);
int kvstoreDictDelete(kvstore *kvs, int didx, const void *key);
kvstoreDictMetadata *kvstoreGetDictMetadata(kvstore *kvs, int didx);
kvstoreMetadata *kvstoreGetMetadata(kvstore *kvs);
#ifdef REDIS_TEST
int kvstoreTest(int argc, char *argv[], int flags);

View File

@ -203,7 +203,7 @@ sds createLatencyReport(void) {
if (dictSize(server.latency_events) == 0 &&
server.latency_monitor_threshold == 0)
{
report = sdscat(report,"I'm sorry, Dave, I can't do that. Latency monitoring is disabled in this Redis instance. You may use \"CONFIG SET latency-monitor-threshold <milliseconds>.\" in order to enable it. If we weren't in a deep space mission I'd suggest to take a look at https://redis.io/topics/latency-monitor.\n");
report = sdscat(report,"I'm sorry, Dave, I can't do that. Latency monitoring is disabled in this Redis instance. You may use \"CONFIG SET latency-monitor-threshold <milliseconds>.\" in order to enable it. If we weren't in a deep space mission I'd suggest to take a look at https://redis.io/docs/latest/operate/oss_and_stack/management/optimization/latency-monitor.\n");
return report;
}

View File

@ -207,7 +207,7 @@ void emptyDbAsync(redisDb *db) {
}
kvstore *oldkeys = db->keys, *oldexpires = db->expires;
ebuckets oldHfe = db->hexpires;
db->keys = kvstoreCreate(&dbDictType, slot_count_bits, flags);
db->keys = kvstoreCreate(&dbDictType, slot_count_bits, flags | KVSTORE_ALLOC_META_KEYS_HIST);
db->expires = kvstoreCreate(&dbExpiresDictType, slot_count_bits, flags);
db->hexpires = ebCreate();
atomicIncr(lazyfree_objects, kvstoreSize(oldkeys));

View File

@ -231,6 +231,11 @@ void lpFree(unsigned char *lp) {
lp_free(lp);
}
/* Generic version of lpFree. */
void lpFreeGeneric(void *lp) {
lp_free((unsigned char *)lp);
}
/* Shrink the memory to fit. */
unsigned char* lpShrinkToFit(unsigned char *lp) {
size_t size = lpGetTotalBytes(lp);
@ -369,6 +374,23 @@ static inline unsigned long lpEncodeBacklen(unsigned char *buf, uint64_t l) {
}
}
/* Calculate the number of bytes required to reverse-encode a variable length
* field representing the length of the previous element of size 'l', ranging
* from 1 to 5. */
static inline unsigned long lpEncodeBacklenBytes(uint64_t l) {
if (l <= 127) {
return 1;
} else if (l < 16383) {
return 2;
} else if (l < 2097151) {
return 3;
} else if (l < 268435455) {
return 4;
} else {
return 5;
}
}
/* Decode the backlen and returns it. If the encoding looks invalid (more than
* 5 bytes are used), UINT64_MAX is returned to report the problem. */
static inline uint64_t lpDecodeBacklen(unsigned char *p) {
@ -431,17 +453,17 @@ static inline uint32_t lpCurrentEncodedSizeUnsafe(unsigned char *p) {
* This includes just the encoding byte, and the bytes needed to encode the length
* of the element (excluding the element data itself)
* If the element encoding is wrong then 0 is returned. */
static inline uint32_t lpCurrentEncodedSizeBytes(unsigned char *p) {
if (LP_ENCODING_IS_7BIT_UINT(p[0])) return 1;
if (LP_ENCODING_IS_6BIT_STR(p[0])) return 1;
if (LP_ENCODING_IS_13BIT_INT(p[0])) return 1;
if (LP_ENCODING_IS_16BIT_INT(p[0])) return 1;
if (LP_ENCODING_IS_24BIT_INT(p[0])) return 1;
if (LP_ENCODING_IS_32BIT_INT(p[0])) return 1;
if (LP_ENCODING_IS_64BIT_INT(p[0])) return 1;
if (LP_ENCODING_IS_12BIT_STR(p[0])) return 2;
if (LP_ENCODING_IS_32BIT_STR(p[0])) return 5;
if (p[0] == LP_EOF) return 1;
static inline uint32_t lpCurrentEncodedSizeBytes(const unsigned char encoding) {
if (LP_ENCODING_IS_7BIT_UINT(encoding)) return 1;
if (LP_ENCODING_IS_6BIT_STR(encoding)) return 1;
if (LP_ENCODING_IS_13BIT_INT(encoding)) return 1;
if (LP_ENCODING_IS_16BIT_INT(encoding)) return 1;
if (LP_ENCODING_IS_24BIT_INT(encoding)) return 1;
if (LP_ENCODING_IS_32BIT_INT(encoding)) return 1;
if (LP_ENCODING_IS_64BIT_INT(encoding)) return 1;
if (LP_ENCODING_IS_12BIT_STR(encoding)) return 2;
if (LP_ENCODING_IS_32BIT_STR(encoding)) return 5;
if (encoding == LP_EOF) return 1;
return 0;
}
@ -449,13 +471,22 @@ static inline uint32_t lpCurrentEncodedSizeBytes(unsigned char *p) {
* function if the current element is the EOF element at the end of the
* listpack, however, while this function is used to implement lpNext(),
* it does not return NULL when the EOF element is encountered. */
unsigned char *lpSkip(unsigned char *p) {
static inline unsigned char *lpSkip(unsigned char *p) {
unsigned long entrylen = lpCurrentEncodedSizeUnsafe(p);
entrylen += lpEncodeBacklen(NULL,entrylen);
entrylen += lpEncodeBacklenBytes(entrylen);
p += entrylen;
return p;
}
/* This is similar to lpNext() but avoids the inner call to lpBytes when you already know the listpack size. */
unsigned char *lpNextWithBytes(unsigned char *lp, unsigned char *p, const size_t lpbytes) {
assert(p);
p = lpSkip(p);
if (p[0] == LP_EOF) return NULL;
lpAssertValidEntry(lp, lpbytes, p);
return p;
}
/* If 'p' points to an element of the listpack, calling lpNext() will return
* the pointer to the next element (the one on the right), or NULL if 'p'
* already pointed to the last element of the listpack. */
@ -475,7 +506,7 @@ unsigned char *lpPrev(unsigned char *lp, unsigned char *p) {
if (p-lp == LP_HDR_SIZE) return NULL;
p--; /* Seek the first backlen byte of the last element. */
uint64_t prevlen = lpDecodeBacklen(p);
prevlen += lpEncodeBacklen(NULL,prevlen);
prevlen += lpEncodeBacklenBytes(prevlen);
p -= prevlen-1; /* Seek the first byte of the previous entry. */
lpAssertValidEntry(lp, lpBytes(lp), p);
return p;
@ -569,7 +600,7 @@ static inline unsigned char *lpGetWithSize(unsigned char *p, int64_t *count, uns
if (entry_size) *entry_size = LP_ENCODING_7BIT_UINT_ENTRY_SIZE;
} else if (LP_ENCODING_IS_6BIT_STR(p[0])) {
*count = LP_ENCODING_6BIT_STR_LEN(p);
if (entry_size) *entry_size = 1 + *count + lpEncodeBacklen(NULL, *count + 1);
if (entry_size) *entry_size = 1 + *count + lpEncodeBacklenBytes(*count + 1);
return p+1;
} else if (LP_ENCODING_IS_13BIT_INT(p[0])) {
uval = ((p[0]&0x1f)<<8) | p[1];
@ -611,11 +642,11 @@ static inline unsigned char *lpGetWithSize(unsigned char *p, int64_t *count, uns
if (entry_size) *entry_size = LP_ENCODING_64BIT_INT_ENTRY_SIZE;
} else if (LP_ENCODING_IS_12BIT_STR(p[0])) {
*count = LP_ENCODING_12BIT_STR_LEN(p);
if (entry_size) *entry_size = 2 + *count + lpEncodeBacklen(NULL, *count + 2);
if (entry_size) *entry_size = 2 + *count + lpEncodeBacklenBytes(*count + 2);
return p+2;
} else if (LP_ENCODING_IS_32BIT_STR(p[0])) {
*count = LP_ENCODING_32BIT_STR_LEN(p);
if (entry_size) *entry_size = 5 + *count + lpEncodeBacklen(NULL, *count + 5);
if (entry_size) *entry_size = 5 + *count + lpEncodeBacklenBytes(*count + 5);
return p+5;
} else {
uval = 12345678900000000ULL + p[0];
@ -647,8 +678,99 @@ static inline unsigned char *lpGetWithSize(unsigned char *p, int64_t *count, uns
}
}
/* Return the listpack element pointed by 'p'.
*
* The function has the same behaviour as lpGetWithSize when 'entry_size' is NULL,
* but avoids a lot of unecesarry branching performance penalties. */
static inline unsigned char *lpGetWithBuf(unsigned char *p, int64_t *count, unsigned char *intbuf) {
int64_t val;
uint64_t uval, negstart, negmax;
assert(p); /* assertion for valgrind (avoid NPD) */
const unsigned char encoding = p[0];
/* string encoding */
if (LP_ENCODING_IS_6BIT_STR(encoding)) {
*count = LP_ENCODING_6BIT_STR_LEN(p);
return p+1;
}
if (LP_ENCODING_IS_12BIT_STR(encoding)) {
*count = LP_ENCODING_12BIT_STR_LEN(p);
return p+2;
}
if (LP_ENCODING_IS_32BIT_STR(encoding)) {
*count = LP_ENCODING_32BIT_STR_LEN(p);
return p+5;
}
/* int encoding */
if (LP_ENCODING_IS_7BIT_UINT(encoding)) {
negstart = UINT64_MAX; /* 7 bit ints are always positive. */
negmax = 0;
uval = encoding & 0x7f;
} else if (LP_ENCODING_IS_13BIT_INT(encoding)) {
uval = ((encoding&0x1f)<<8) | p[1];
negstart = (uint64_t)1<<12;
negmax = 8191;
} else if (LP_ENCODING_IS_16BIT_INT(encoding)) {
uval = (uint64_t)p[1] |
(uint64_t)p[2]<<8;
negstart = (uint64_t)1<<15;
negmax = UINT16_MAX;
} else if (LP_ENCODING_IS_24BIT_INT(encoding)) {
uval = (uint64_t)p[1] |
(uint64_t)p[2]<<8 |
(uint64_t)p[3]<<16;
negstart = (uint64_t)1<<23;
negmax = UINT32_MAX>>8;
} else if (LP_ENCODING_IS_32BIT_INT(encoding)) {
uval = (uint64_t)p[1] |
(uint64_t)p[2]<<8 |
(uint64_t)p[3]<<16 |
(uint64_t)p[4]<<24;
negstart = (uint64_t)1<<31;
negmax = UINT32_MAX;
} else if (LP_ENCODING_IS_64BIT_INT(encoding)) {
uval = (uint64_t)p[1] |
(uint64_t)p[2]<<8 |
(uint64_t)p[3]<<16 |
(uint64_t)p[4]<<24 |
(uint64_t)p[5]<<32 |
(uint64_t)p[6]<<40 |
(uint64_t)p[7]<<48 |
(uint64_t)p[8]<<56;
negstart = (uint64_t)1<<63;
negmax = UINT64_MAX;
} else {
uval = 12345678900000000ULL + encoding;
negstart = UINT64_MAX;
negmax = 0;
}
/* We reach this code path only for integer encodings.
* Convert the unsigned value to the signed one using two's complement
* rule. */
if (uval >= negstart) {
/* This three steps conversion should avoid undefined behaviors
* in the unsigned -> signed conversion. */
uval = negmax-uval;
val = uval;
val = -val-1;
} else {
val = uval;
}
/* Return the string representation of the integer or the value itself
* depending on intbuf being NULL or not. */
if (intbuf) {
*count = ll2string((char*)intbuf,LP_INTBUF_SIZE,(long long)val);
return intbuf;
} else {
*count = val;
return NULL;
}
}
unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf) {
return lpGetWithSize(p, count, intbuf, NULL);
return lpGetWithBuf(p, count, intbuf);
}
/* This is just a wrapper to lpGet() that is able to get entry value directly.
@ -880,7 +1002,7 @@ unsigned char *lpInsert(unsigned char *lp, unsigned char *elestr, unsigned char
uint32_t replaced_len = 0;
if (where == LP_REPLACE) {
replaced_len = lpCurrentEncodedSizeUnsafe(p);
replaced_len += lpEncodeBacklen(NULL,replaced_len);
replaced_len += lpEncodeBacklenBytes(replaced_len);
ASSERT_INTEGRITY_LEN(lp, p, replaced_len);
}
@ -1420,7 +1542,7 @@ size_t lpBytes(unsigned char *lp) {
size_t lpEntrySizeInteger(long long lval) {
uint64_t enclen;
lpEncodeIntegerGetType(lval, NULL, &enclen);
unsigned long backlen = lpEncodeBacklen(NULL, enclen);
unsigned long backlen = lpEncodeBacklenBytes(enclen);
return enclen + backlen;
}
@ -1487,6 +1609,7 @@ unsigned char *lpValidateFirst(unsigned char *lp) {
/* Validate the integrity of a single listpack entry and move to the next one.
* The input argument 'pp' is a reference to the current record and is advanced on exit.
* the data pointed to by 'lp' will not be modified by the function.
* Returns 1 if valid, 0 if invalid. */
int lpValidateNext(unsigned char *lp, unsigned char **pp, size_t lpbytes) {
#define OUT_OF_RANGE(p) ( \
@ -1506,7 +1629,7 @@ int lpValidateNext(unsigned char *lp, unsigned char **pp, size_t lpbytes) {
}
/* check that we can read the encoded size */
uint32_t lenbytes = lpCurrentEncodedSizeBytes(p);
uint32_t lenbytes = lpCurrentEncodedSizeBytes(p[0]);
if (!lenbytes)
return 0;
@ -1516,7 +1639,7 @@ int lpValidateNext(unsigned char *lp, unsigned char **pp, size_t lpbytes) {
/* get the entry length and encoded backlen. */
unsigned long entrylen = lpCurrentEncodedSizeUnsafe(p);
unsigned long encodedBacklen = lpEncodeBacklen(NULL,entrylen);
unsigned long encodedBacklen = lpEncodeBacklenBytes(entrylen);
entrylen += encodedBacklen;
/* make sure the entry doesn't reach outside the edge of the listpack */
@ -1859,9 +1982,9 @@ void lpRepr(unsigned char *lp) {
p = lpFirst(lp);
while(p) {
uint32_t encoded_size_bytes = lpCurrentEncodedSizeBytes(p);
uint32_t encoded_size_bytes = lpCurrentEncodedSizeBytes(p[0]);
uint32_t encoded_size = lpCurrentEncodedSizeUnsafe(p);
unsigned long back_len = lpEncodeBacklen(NULL, encoded_size);
unsigned long back_len = lpEncodeBacklenBytes(encoded_size);
printf(
"{\n"
"\taddr: 0x%08lx,\n"
@ -3002,7 +3125,7 @@ int listpackTest(int argc, char *argv[], int flags) {
for (i = 0; i < iteration; i++) {
lp = lpNew(0);
ref = listCreate();
listSetFreeMethod(ref,(void (*)(void*))sdsfree);
listSetFreeMethod(ref, sdsfreegeneric);
len = rand() % 256;
/* Create lists */

View File

@ -35,6 +35,7 @@ typedef struct {
unsigned char *lpNew(size_t capacity);
void lpFree(unsigned char *lp);
void lpFreeGeneric(void *lp);
unsigned char* lpShrinkToFit(unsigned char *lp);
unsigned char *lpInsertString(unsigned char *lp, unsigned char *s, uint32_t slen,
unsigned char *p, int where, unsigned char **newp);
@ -65,6 +66,7 @@ unsigned char *lpFindCb(unsigned char *lp, unsigned char *p, void *user, lpCmp c
unsigned char *lpFirst(unsigned char *lp);
unsigned char *lpLast(unsigned char *lp);
unsigned char *lpNext(unsigned char *lp, unsigned char *p);
unsigned char *lpNextWithBytes(unsigned char *lp, unsigned char *p, const size_t lpbytes);
unsigned char *lpPrev(unsigned char *lp, unsigned char *p);
size_t lpBytes(unsigned char *lp);
size_t lpEntrySizeInteger(long long lval);

View File

@ -437,7 +437,13 @@ typedef int (*RedisModuleConfigApplyFunc)(RedisModuleCtx *ctx, void *privdata, R
/* Struct representing a module config. These are stored in a list in the module struct */
struct ModuleConfig {
sds name; /* Name of config without the module name appended to the front */
sds name; /* Fullname of the config (as it appears in the config file) */
sds alias; /* Optional alias for the configuration. NULL if none exists */
int unprefixedFlag; /* Indicates if the REDISMODULE_CONFIG_UNPREFIXED flag was set.
* If the configuration name was prefixed,during get_fn/set_fn
* callbacks, it should be reported without the prefix */
void *privdata; /* Optional data passed into the module config callbacks */
union get_fn { /* The get callback specified by the module */
RedisModuleConfigGetStringFunc get_string;
@ -658,7 +664,7 @@ void moduleReleaseTempClient(client *c) {
c->bufpos = 0;
c->flags = CLIENT_MODULE;
c->user = NULL; /* Root user */
c->cmd = c->lastcmd = c->realcmd = NULL;
c->cmd = c->lastcmd = c->realcmd = c->iolookedcmd = NULL;
if (c->bstate.async_rm_call_handle) {
RedisModuleAsyncRMCallPromise *promise = c->bstate.async_rm_call_handle;
promise->c = NULL; /* Remove the client from the promise so it will no longer be possible to abort it. */
@ -983,7 +989,7 @@ int moduleGetCommandChannelsViaAPI(struct redisCommand *cmd, robj **argv, int ar
*
* These functions are used to implement custom Redis commands.
*
* For examples, see https://redis.io/topics/modules-intro.
* For examples, see https://redis.io/docs/latest/develop/reference/modules/.
* -------------------------------------------------------------------------- */
/* Return non-zero if a module command, that was declared with the
@ -1197,7 +1203,7 @@ RedisModuleCommand *moduleCreateCommandProxy(struct RedisModule *module, sds dec
* from the same input arguments and key values.
* Starting from Redis 7.0 this flag has been deprecated.
* Declaring a command as "random" can be done using
* command tips, see https://redis.io/topics/command-tips.
* command tips, see https://redis.io/docs/latest/develop/reference/command-tips/.
* * **"allow-stale"**: The command is allowed to run on slaves that don't
* serve stale data. Don't use if you don't know what
* this means.
@ -1270,8 +1276,11 @@ int RM_CreateCommand(RedisModuleCtx *ctx, const char *name, RedisModuleCmdFunc c
RedisModuleCommand *cp = moduleCreateCommandProxy(ctx->module, declared_name, sdsdup(declared_name), cmdfunc, flags, firstkey, lastkey, keystep);
cp->rediscmd->arity = cmdfunc ? -1 : -2; /* Default value, can be changed later via dedicated API */
pauseAllIOThreads();
serverAssert(dictAdd(server.commands, sdsdup(declared_name), cp->rediscmd) == DICT_OK);
serverAssert(dictAdd(server.orig_commands, sdsdup(declared_name), cp->rediscmd) == DICT_OK);
resumeAllIOThreads();
cp->rediscmd->id = ACLGetCommandID(declared_name); /* ID used for ACL. */
return REDISMODULE_OK;
}
@ -1587,7 +1596,7 @@ int RM_SetCommandACLCategories(RedisModuleCommand *command, const char *aclflags
* both strings set to NULL.
*
* - `tips`: A string of space-separated tips regarding this command, meant for
* clients and proxies. See https://redis.io/topics/command-tips.
* clients and proxies. See https://redis.io/docs/latest/develop/reference/command-tips/.
*
* - `arity`: Number of arguments, including the command name itself. A positive
* number specifies an exact number of arguments and a negative number
@ -2253,12 +2262,16 @@ int moduleIsModuleCommand(void *module_handle, struct redisCommand *cmd) {
* -------------------------------------------------------------------------- */
int moduleListConfigMatch(void *config, void *name) {
return strcasecmp(((ModuleConfig *) config)->name, (char *) name) == 0;
ModuleConfig *mc = (ModuleConfig *) config;
/* Compare the provided name with the config's name and alias if it exists */
return strcasecmp(mc->name, (char *) name) == 0 ||
((mc->alias) && strcasecmp(mc->alias, (char *) name) == 0);
}
void moduleListFree(void *config) {
ModuleConfig *module_config = (ModuleConfig *) config;
sdsfree(module_config->name);
sdsfree(module_config->alias);
zfree(config);
}
@ -4171,15 +4184,7 @@ int RM_KeyType(RedisModuleKey *key) {
* If the key pointer is NULL or the key is empty, zero is returned. */
size_t RM_ValueLength(RedisModuleKey *key) {
if (key == NULL || key->value == NULL) return 0;
switch(key->value->type) {
case OBJ_STRING: return stringObjectLen(key->value);
case OBJ_LIST: return listTypeLength(key->value);
case OBJ_SET: return setTypeSize(key->value);
case OBJ_ZSET: return zsetLength(key->value);
case OBJ_HASH: return hashTypeLength(key->value, 0); /* OPEN: To subtract expired fields? */
case OBJ_STREAM: return streamLength(key->value);
default: return 0;
}
return getObjectLength(key->value);
}
/* If the key is open for writing, remove it, and setup the key to
@ -5357,6 +5362,11 @@ int RM_HashSet(RedisModuleKey *key, int flags, ...) {
* reports if the field exists or not and expects an integer pointer
* as the second element of each pair.
*
* REDISMODULE_HASH_EXPIRE_TIME: retrieves the expiration time of a field in the hash.
* The function expects a `mstime_t` pointer as the second element of each pair.
* If the field does not exist or has no expiration, the value is set to
* `REDISMODULE_NO_EXPIRE`. This flag must not be used with `REDISMODULE_HASH_EXISTS`.
*
* Example of REDISMODULE_HASH_CFIELDS:
*
* RedisModuleString *username, *hashedpass;
@ -5365,7 +5375,12 @@ int RM_HashSet(RedisModuleKey *key, int flags, ...) {
* Example of REDISMODULE_HASH_EXISTS:
*
* int exists;
* RedisModule_HashGet(mykey,REDISMODULE_HASH_EXISTS,argv[1],&exists,NULL);
* RedisModule_HashGet(mykey,REDISMODULE_HASH_EXISTS,"username",&exists,NULL);
*
* Example of REDISMODULE_HASH_EXPIRE_TIME:
*
* mstime_t hpExpireTime;
* RedisModule_HashGet(mykey,REDISMODULE_HASH_EXPIRE_TIME,"hp",&hpExpireTime,NULL);
*
* The function returns REDISMODULE_OK on success and REDISMODULE_ERR if
* the key is not a hash value.
@ -5383,6 +5398,10 @@ int RM_HashGet(RedisModuleKey *key, int flags, ...) {
if (key->mode & REDISMODULE_OPEN_KEY_ACCESS_EXPIRED)
hfeFlags = HFE_LAZY_ACCESS_EXPIRED; /* allow read also expired fields */
/* Verify flag HASH_EXISTS is not set together with HASH_EXPIRE_TIME */
if ((flags & REDISMODULE_HASH_EXISTS) && (flags & REDISMODULE_HASH_EXPIRE_TIME))
return REDISMODULE_ERR;
va_start(ap, flags);
while(1) {
RedisModuleString *field, **valueptr;
@ -5405,11 +5424,22 @@ int RM_HashGet(RedisModuleKey *key, int flags, ...) {
} else {
*existsptr = 0;
}
} else if (flags & REDISMODULE_HASH_EXPIRE_TIME) {
mstime_t *expireptr = va_arg(ap,mstime_t*);
*expireptr = REDISMODULE_NO_EXPIRE;
if (key->value) {
uint64_t expireTime = 0;
/* As an opt, avoid fetching value, only expire time */
int res = hashTypeGetValueObject(key->db, key->value, field->ptr,
hfeFlags, NULL, &expireTime, NULL);
/* If field has expiration time */
if (res && expireTime != 0) *expireptr = expireTime;
}
} else {
valueptr = va_arg(ap,RedisModuleString**);
if (key->value) {
*valueptr = hashTypeGetValueObject(key->db, key->value, field->ptr,
hfeFlags, NULL);
hashTypeGetValueObject(key->db, key->value, field->ptr,
hfeFlags, valueptr, NULL, NULL);
if (*valueptr) {
robj *decoded = getDecodedObject(*valueptr);
@ -5430,10 +5460,27 @@ int RM_HashGet(RedisModuleKey *key, int flags, ...) {
return REDISMODULE_OK;
}
/**
* Retrieves the minimum expiration time of fields in a hash.
*
* Return:
* - The minimum expiration time (in milliseconds) of the hash fields if at
* least one field has an expiration set.
* - REDISMODULE_NO_EXPIRE if no fields have an expiration set or if the key
* is not a hash.
*/
mstime_t RM_HashFieldMinExpire(RedisModuleKey *key) {
if ((!key->value) || (key->value->type != OBJ_HASH))
return REDISMODULE_NO_EXPIRE;
mstime_t min = hashTypeGetMinExpire(key->value, 1);
return (min == EB_EXPIRE_TIME_INVALID) ? REDISMODULE_NO_EXPIRE : min;
}
/* --------------------------------------------------------------------------
* ## Key API for Stream type
*
* For an introduction to streams, see https://redis.io/topics/streams-intro.
* For an introduction to streams, see https://redis.io/docs/latest/develop/data-types/streams/.
*
* The type RedisModuleStreamID, which is used in stream functions, is a struct
* with two 64-bit fields and is defined as
@ -6306,7 +6353,7 @@ fmterr:
* // Do something with myval.
* }
*
* This API is documented here: https://redis.io/topics/modules-intro
* This API is documented here: https://redis.io/docs/latest/develop/reference/modules/
*/
RedisModuleCallReply *RM_Call(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...) {
client *c = NULL;
@ -6816,7 +6863,7 @@ robj *moduleTypeDupOrReply(client *c, robj *fromkey, robj *tokey, int todb, robj
/* Register a new data type exported by the module. The parameters are the
* following. Please for in depth documentation check the modules API
* documentation, especially https://redis.io/topics/modules-native-types.
* documentation, especially https://redis.io/docs/latest/develop/reference/modules/modules-native-types/.
*
* * **name**: A 9 characters data type name that MUST be unique in the Redis
* Modules ecosystem. Be creative... and there will be no collisions. Use
@ -7705,7 +7752,7 @@ void RM_LatencyAddSample(const char *event, mstime_t latency) {
* ## Blocking clients from modules
*
* For a guide about blocking commands in modules, see
* https://redis.io/topics/modules-blocking-ops.
* https://redis.io/docs/latest/develop/reference/modules/modules-blocking-ops/.
* -------------------------------------------------------------------------- */
/* Returns 1 if the client already in the moduleUnblocked list, 0 otherwise. */
@ -8717,7 +8764,7 @@ void moduleReleaseGIL(void) {
* runs is dangerous and discouraged. In order to react to key space events with
* write actions, please refer to `RM_AddPostNotificationJob`.
*
* See https://redis.io/topics/notifications for more information.
* See https://redis.io/docs/latest/develop/use/keyspace-notifications/ for more information.
*/
int RM_SubscribeToKeyspaceEvents(RedisModuleCtx *ctx, int types, RedisModuleNotificationFunc callback) {
RedisModuleKeyspaceSubscriber *sub = zmalloc(sizeof(*sub));
@ -9678,6 +9725,12 @@ RedisModuleString *RM_GetModuleUserACLString(RedisModuleUser *user) {
* The returned string must be released with RedisModule_FreeString() or by
* enabling automatic memory management. */
RedisModuleString *RM_GetCurrentUserName(RedisModuleCtx *ctx) {
/* Sometimes, the user isn't passed along the call stack or isn't
* even set, so we need to check for the members to avoid crashes. */
if (ctx->client == NULL || ctx->client->user == NULL || ctx->client->user->name == NULL) {
return NULL;
}
return RM_CreateString(ctx,ctx->client->user->name,sdslen(ctx->client->user->name));
}
@ -9766,6 +9819,45 @@ int RM_ACLCheckKeyPermissions(RedisModuleUser *user, RedisModuleString *key, int
return REDISMODULE_OK;
}
/* Check if the user can access keys matching the given key prefix according to the ACLs
* attached to the user and the flags representing key access. The flags are the same that
* are used in the keyspec for logical operations. These flags are documented in
* RedisModule_SetCommandInfo as the REDISMODULE_CMD_KEY_ACCESS,
* REDISMODULE_CMD_KEY_UPDATE, REDISMODULE_CMD_KEY_INSERT, and REDISMODULE_CMD_KEY_DELETE flags.
*
* If no flags are supplied, the user is still required to have some access to keys matching
* the prefix for this command to return successfully.
*
* If the user is able to access keys matching the prefix, then REDISMODULE_OK is returned.
* Otherwise, REDISMODULE_ERR is returned and errno is set to one of the following values:
*
* * EINVAL: The provided flags are invalid.
* * EACCES: The user does not have permission to access keys matching the prefix.
*/
int RM_ACLCheckKeyPrefixPermissions(RedisModuleUser *user, RedisModuleString *prefix, int flags) {
const int allow_mask = (REDISMODULE_CMD_KEY_ACCESS
| REDISMODULE_CMD_KEY_INSERT
| REDISMODULE_CMD_KEY_DELETE
| REDISMODULE_CMD_KEY_UPDATE);
if ((flags & allow_mask) != flags) {
errno = EINVAL;
return REDISMODULE_ERR;
}
int keyspec_flags = moduleConvertKeySpecsFlags(flags, 0);
/* Add the prefix flag to the keyspec flags */
keyspec_flags |= CMD_KEY_PREFIX;
if (ACLUserCheckKeyPerm(user->user, prefix->ptr, sdslen(prefix->ptr), keyspec_flags) != ACL_OK) {
errno = EACCES;
return REDISMODULE_ERR;
}
return REDISMODULE_OK;
}
/* Check if the pubsub channel can be accessed by the user based off of the given
* access flags. See RM_ChannelAtPosWithFlags for more information about the
* possible flags that can be passed in.
@ -10465,7 +10557,7 @@ RedisModuleServerInfoData *RM_GetServerInfo(RedisModuleCtx *ctx, const char *sec
* context instead of passing NULL. */
void RM_FreeServerInfo(RedisModuleCtx *ctx, RedisModuleServerInfoData *data) {
if (ctx != NULL) autoMemoryFreed(ctx,REDISMODULE_AM_INFO,data);
raxFreeWithCallback(data->rax, (void(*)(void*))sdsfree);
raxFreeWithCallback(data->rax, sdsfreegeneric);
zfree(data);
}
@ -10816,6 +10908,10 @@ void moduleCallCommandFilters(client *c) {
f->callback(&filter);
}
/* If the filter sets a new command, including command or subcommand,
* the command looked up in IO threads will be invalid. */
c->iolookedcmd = NULL;
c->argv = filter.argv;
c->argv_len = filter.argv_len;
c->argc = filter.argc;
@ -12092,10 +12188,9 @@ void moduleRemoveConfigs(RedisModule *module) {
listRewind(module->module_configs, &li);
while ((ln = listNext(&li))) {
ModuleConfig *config = listNodeValue(ln);
sds module_name = sdsnew(module->name);
sds full_name = sdscat(sdscat(module_name, "."), config->name); /* ModuleName.ModuleConfig */
removeConfig(full_name);
sdsfree(full_name);
removeConfig(config->name);
if (config->alias)
removeConfig(config->alias);
}
}
@ -12134,6 +12229,12 @@ void moduleLoadFromQueue(void) {
listDelNode(server.loadmodule_queue, ln);
}
if (dictSize(server.module_configs_queue)) {
serverLog(LL_WARNING, "Unresolved Configuration(s) Detected:");
dictIterator *di = dictGetIterator(server.module_configs_queue);
dictEntry *de;
while ((de = dictNext(di)) != NULL) {
serverLog(LL_WARNING, ">>> '%s %s'", (char *)dictGetKey(de), (char *)dictGetVal(de));
}
serverLog(LL_WARNING, "Module Configuration detected without loadmodule directive or no ApplyConfig call: aborting");
exit(1);
}
@ -12227,6 +12328,7 @@ int moduleFreeCommand(struct RedisModule *module, struct redisCommand *cmd) {
}
void moduleUnregisterCommands(struct RedisModule *module) {
pauseAllIOThreads();
/* Unregister all the commands registered by this module. */
dictIterator *di = dictGetSafeIterator(server.commands);
dictEntry *de;
@ -12241,6 +12343,7 @@ void moduleUnregisterCommands(struct RedisModule *module) {
zfree(cmd);
}
dictReleaseIterator(di);
resumeAllIOThreads();
}
/* We parse argv to add sds "NAME VALUE" pairs to the server.module_configs_queue list of configs.
@ -12373,7 +12476,7 @@ int moduleLoad(const char *path, void **module_argv, int module_argc, int is_loa
}
if (post_load_err) {
moduleUnload(ctx.module->name, NULL);
serverAssert(moduleUnload(ctx.module->name, NULL, 1) == C_OK);
moduleFreeContext(&ctx);
return C_ERR;
}
@ -12389,14 +12492,17 @@ int moduleLoad(const char *path, void **module_argv, int module_argc, int is_loa
/* Unload the module registered with the specified name. On success
* C_OK is returned, otherwise C_ERR is returned and errmsg is set
* with an appropriate message. */
int moduleUnload(sds name, const char **errmsg) {
* with an appropriate message.
* Only forcefully unload this module, passing forced_unload != 0,
* if it is certain that it has not yet been in use (e.g., immediate
* unload on failed load). */
int moduleUnload(sds name, const char **errmsg, int forced_unload) {
struct RedisModule *module = dictFetchValue(modules,name);
if (module == NULL) {
*errmsg = "no such module with that name";
return C_ERR;
} else if (listLength(module->types)) {
} else if (listLength(module->types) && !forced_unload) {
*errmsg = "the module exports one or more module-side data "
"types, can't unload";
return C_ERR;
@ -12581,7 +12687,8 @@ int moduleVerifyConfigFlags(unsigned int flags, configType type) {
| REDISMODULE_CONFIG_PROTECTED
| REDISMODULE_CONFIG_DENY_LOADING
| REDISMODULE_CONFIG_BITFLAGS
| REDISMODULE_CONFIG_MEMORY))) {
| REDISMODULE_CONFIG_MEMORY
| REDISMODULE_CONFIG_UNPREFIXED))) {
serverLogRaw(LL_WARNING, "Invalid flag(s) for configuration");
return REDISMODULE_ERR;
}
@ -12618,6 +12725,54 @@ int moduleVerifyResourceName(const char *name) {
return REDISMODULE_OK;
}
/* Verify unprefixed name config might be a single "<name>" or in the form
* "<name>|<alias>". Unlike moduleVerifyResourceName(), unprefixed name config
* allows a single dot in the name or alias.
*
* delim - Updates to point to "|" if it exists, NULL otherwise.
*/
int moduleVerifyUnprefixedName(const char *nameAlias, const char **delim) {
if (nameAlias[0] == '\0')
return REDISMODULE_ERR;
*delim = NULL;
int dot_count = 0, lname = 0;
for (size_t i = 0; nameAlias[i] != '\0'; i++) {
char ch = nameAlias[i];
if (((*delim) == NULL) && (ch == '|')) {
/* Handle single separator between name and alias */
if (!lname) {
serverLog(LL_WARNING, "Module configuration name is empty: %s", nameAlias);
return REDISMODULE_ERR;
}
*delim = &nameAlias[i];
dot_count = lname = 0;
} else if ( (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
(ch >= '0' && ch <= '9') || (ch == '_') || (ch == '-') )
{
++lname;
} else if (ch == '.') {
/* Allow only one dot per section (name or alias) */
if (++dot_count > 1) {
serverLog(LL_WARNING, "Invalid character sequence in Module configuration name or alias: %s", nameAlias);
return REDISMODULE_ERR;
}
} else {
serverLog(LL_WARNING, "Invalid character %c in Module configuration name or alias %s.", ch, nameAlias);
return REDISMODULE_ERR;
}
}
if (!lname) {
serverLog(LL_WARNING, "Module configuration name or alias is empty : %s", nameAlias);
return REDISMODULE_ERR;
}
return REDISMODULE_OK;
}
/* This is a series of set functions for each type that act as dispatchers for
* config.c to call module set callbacks. */
#define CONFIG_ERR_SIZE 256
@ -12630,9 +12785,24 @@ static void propagateErrorString(RedisModuleString *err_in, const char **err) {
}
}
/* If configuration was originally registered with indication to prefix the name,
* return the name without the prefix by skipping prefix "<MODULE-NAME>.".
* Otherwise, return the stored name as is. */
static char *getRegisteredConfigName(ModuleConfig *config) {
if (config->unprefixedFlag)
return config->name;
/* For prefixed configuration, find the '.' indicating the end of the prefix */
char *endOfPrefix = strchr(config->name, '.');
serverAssert(endOfPrefix != NULL);
return endOfPrefix + 1;
}
int setModuleBoolConfig(ModuleConfig *config, int val, const char **err) {
RedisModuleString *error = NULL;
int return_code = config->set_fn.set_bool(config->name, val, config->privdata, &error);
char *rname = getRegisteredConfigName(config);
int return_code = config->set_fn.set_bool(rname, val, config->privdata, &error);
propagateErrorString(error, err);
return return_code == REDISMODULE_OK ? 1 : 0;
}
@ -12640,7 +12810,9 @@ int setModuleBoolConfig(ModuleConfig *config, int val, const char **err) {
int setModuleStringConfig(ModuleConfig *config, sds strval, const char **err) {
RedisModuleString *error = NULL;
RedisModuleString *new = createStringObject(strval, sdslen(strval));
int return_code = config->set_fn.set_string(config->name, new, config->privdata, &error);
char *rname = getRegisteredConfigName(config);
int return_code = config->set_fn.set_string(rname, new, config->privdata, &error);
propagateErrorString(error, err);
decrRefCount(new);
return return_code == REDISMODULE_OK ? 1 : 0;
@ -12655,7 +12827,8 @@ int setModuleEnumConfig(ModuleConfig *config, int val, const char **err) {
int setModuleNumericConfig(ModuleConfig *config, long long val, const char **err) {
RedisModuleString *error = NULL;
int return_code = config->set_fn.set_numeric(config->name, val, config->privdata, &error);
char *rname = getRegisteredConfigName(config);
int return_code = config->set_fn.set_numeric(rname, val, config->privdata, &error);
propagateErrorString(error, err);
return return_code == REDISMODULE_OK ? 1 : 0;
}
@ -12663,20 +12836,24 @@ int setModuleNumericConfig(ModuleConfig *config, long long val, const char **err
/* This is a series of get functions for each type that act as dispatchers for
* config.c to call module set callbacks. */
int getModuleBoolConfig(ModuleConfig *module_config) {
return module_config->get_fn.get_bool(module_config->name, module_config->privdata);
char *rname = getRegisteredConfigName(module_config);
return module_config->get_fn.get_bool(rname, module_config->privdata);
}
sds getModuleStringConfig(ModuleConfig *module_config) {
RedisModuleString *val = module_config->get_fn.get_string(module_config->name, module_config->privdata);
char *rname = getRegisteredConfigName(module_config);
RedisModuleString *val = module_config->get_fn.get_string(rname, module_config->privdata);
return val ? sdsdup(val->ptr) : NULL;
}
int getModuleEnumConfig(ModuleConfig *module_config) {
return module_config->get_fn.get_enum(module_config->name, module_config->privdata);
char *rname = getRegisteredConfigName(module_config);
return module_config->get_fn.get_enum(rname, module_config->privdata);
}
long long getModuleNumericConfig(ModuleConfig *module_config) {
return module_config->get_fn.get_numeric(module_config->name, module_config->privdata);
char *rname = getRegisteredConfigName(module_config);
return module_config->get_fn.get_numeric(rname, module_config->privdata);
}
/* This function takes a module and a list of configs stored as sds NAME VALUE pairs.
@ -12688,25 +12865,26 @@ int loadModuleConfigs(RedisModule *module) {
listRewind(module->module_configs, &li);
while ((ln = listNext(&li))) {
ModuleConfig *module_config = listNodeValue(ln);
sds config_name = sdscatfmt(sdsempty(), "%s.%s", module->name, module_config->name);
dictEntry *config_argument = dictFind(server.module_configs_queue, config_name);
if (config_argument) {
if (!performModuleConfigSetFromName(dictGetKey(config_argument), dictGetVal(config_argument), &err)) {
serverLog(LL_WARNING, "Issue during loading of configuration %s : %s", (sds) dictGetKey(config_argument), err);
sdsfree(config_name);
dictEntry *de = dictUnlink(server.module_configs_queue, module_config->name);
if ((!de) && (module_config->alias))
de = dictUnlink(server.module_configs_queue, module_config->alias);
/* If found in the queue, set the value. Otherwise, set the default value. */
if (de) {
if (!performModuleConfigSetFromName(dictGetKey(de), dictGetVal(de), &err)) {
serverLog(LL_WARNING, "Issue during loading of configuration %s : %s", (sds) dictGetKey(de), err);
dictFreeUnlinkedEntry(server.module_configs_queue, de);
dictEmpty(server.module_configs_queue, NULL);
return REDISMODULE_ERR;
}
dictFreeUnlinkedEntry(server.module_configs_queue, de);
} else {
if (!performModuleConfigSetDefaultFromName(config_name, &err)) {
if (!performModuleConfigSetDefaultFromName(module_config->name, &err)) {
serverLog(LL_WARNING, "Issue attempting to set default value of configuration %s : %s", module_config->name, err);
sdsfree(config_name);
dictEmpty(server.module_configs_queue, NULL);
return REDISMODULE_ERR;
}
}
dictDelete(server.module_configs_queue, config_name);
sdsfree(config_name);
}
module->configs_initialized = 1;
return REDISMODULE_OK;
@ -12756,26 +12934,93 @@ int moduleConfigApplyConfig(list *module_configs, const char **err, const char *
* ## Module Configurations API
* -------------------------------------------------------------------------- */
/* Create a module config object. */
ModuleConfig *createModuleConfig(const char *name, RedisModuleConfigApplyFunc apply_fn, void *privdata, RedisModule *module) {
/* Resolve config name and create a module config object */
ModuleConfig *createModuleConfig(const char *name, RedisModuleConfigApplyFunc apply_fn,
void *privdata, RedisModule *module, unsigned int flags)
{
sds cname, alias = NULL;
/* Determine the configuration name:
* - If the unprefixed flag is set, the "<MODULE-NAME>." prefix is omitted.
* - An optional alias can be specified using "<NAME>|<ALIAS>".
*
* Examples:
* - Unprefixed: "bf.initial_size" or "bf-initial-size|bf.initial_size".
* - Prefixed: "initial_size" becomes "<MODULE-NAME>.initial_size".
*/
if (flags & REDISMODULE_CONFIG_UNPREFIXED) {
const char *delim = strchr(name, '|');
cname = sdsnew(name);
if (delim) { /* Handle "<NAME>|<ALIAS>" format */
sdssubstr(cname, 0, delim - name);
alias = sdsnew(delim + 1);
}
} else {
/* Add the module name prefix */
cname = sdscatfmt(sdsempty(), "%s.%s", module->name, name);
}
ModuleConfig *new_config = zmalloc(sizeof(ModuleConfig));
new_config->name = sdsnew(name);
new_config->unprefixedFlag = flags & REDISMODULE_CONFIG_UNPREFIXED;
new_config->name = cname;
new_config->alias = alias;
new_config->apply_fn = apply_fn;
new_config->privdata = privdata;
new_config->module = module;
return new_config;
}
/* Verify the configuration name and check for duplicates.
*
* - If the configuration is flagged as unprefixed, it checks for duplicate
* names and optional aliases in the format <NAME>|<ALIAS>.
* - If the configuration is prefixed, it ensures the name is unique with
* the module name prepended (<MODULE_NAME>.<NAME>).
*/
int moduleConfigValidityCheck(RedisModule *module, const char *name, unsigned int flags, configType type) {
if (!module->onload) {
errno = EBUSY;
return REDISMODULE_ERR;
}
if (moduleVerifyConfigFlags(flags, type) || moduleVerifyResourceName(name)) {
if (moduleVerifyConfigFlags(flags, type)) {
errno = EINVAL;
return REDISMODULE_ERR;
}
if (isModuleConfigNameRegistered(module, name)) {
int isdup = 0;
if (flags & REDISMODULE_CONFIG_UNPREFIXED) {
const char *delim = NULL; /* Pointer to the '|' delimiter in <NAME>|<ALIAS> */
if (moduleVerifyUnprefixedName(name, &delim)){
errno = EINVAL;
return REDISMODULE_ERR;
}
if (delim) {
/* Temporary split the "<NAME>|<ALIAS>" for the check */
int count;
sds *ar = sdssplitlen(name, strlen(name), "|", 1, &count);
serverAssert(count == 2); /* Already validated */
isdup = configExists(ar[0]) ||
configExists(ar[1]) ||
(sdscmp(ar[0], ar[1]) == 0);
sdsfreesplitres(ar, count);
} else {
sds _name = sdsnew(name);
isdup = configExists(_name);
sdsfree(_name);
}
} else {
if (moduleVerifyResourceName(name)) {
errno = EINVAL;
return REDISMODULE_ERR;
}
sds fullname = sdscatfmt(sdsempty(), "%s.%s", module->name, name);
isdup = configExists(fullname);
sdsfree(fullname);
}
if (isdup) {
serverLog(LL_WARNING, "Configuration by the name: %s already registered", name);
errno = EALREADY;
return REDISMODULE_ERR;
@ -12885,12 +13130,14 @@ int RM_RegisterStringConfig(RedisModuleCtx *ctx, const char *name, const char *d
if (moduleConfigValidityCheck(module, name, flags, NUMERIC_CONFIG)) {
return REDISMODULE_ERR;
}
ModuleConfig *new_config = createModuleConfig(name, applyfn, privdata, module);
new_config->get_fn.get_string = getfn;
new_config->set_fn.set_string = setfn;
listAddNodeTail(module->module_configs, new_config);
flags = maskModuleConfigFlags(flags);
addModuleStringConfig(module->name, name, flags, new_config, default_val ? sdsnew(default_val) : NULL);
ModuleConfig *mc = createModuleConfig(name, applyfn, privdata, module, flags);
mc->get_fn.get_string = getfn;
mc->set_fn.set_string = setfn;
listAddNodeTail(module->module_configs, mc);
unsigned int cflags = maskModuleConfigFlags(flags);
addModuleStringConfig(sdsdup(mc->name), (mc->alias) ? sdsdup(mc->alias) : NULL,
cflags, mc, default_val ? sdsnew(default_val) : NULL);
return REDISMODULE_OK;
}
@ -12902,12 +13149,13 @@ int RM_RegisterBoolConfig(RedisModuleCtx *ctx, const char *name, int default_val
if (moduleConfigValidityCheck(module, name, flags, BOOL_CONFIG)) {
return REDISMODULE_ERR;
}
ModuleConfig *new_config = createModuleConfig(name, applyfn, privdata, module);
new_config->get_fn.get_bool = getfn;
new_config->set_fn.set_bool = setfn;
listAddNodeTail(module->module_configs, new_config);
flags = maskModuleConfigFlags(flags);
addModuleBoolConfig(module->name, name, flags, new_config, default_val);
ModuleConfig *mc = createModuleConfig(name, applyfn, privdata, module, flags);
mc->get_fn.get_bool = getfn;
mc->set_fn.set_bool = setfn;
listAddNodeTail(module->module_configs, mc);
unsigned int cflags = maskModuleConfigFlags(flags);
addModuleBoolConfig(sdsdup(mc->name), (mc->alias) ? sdsdup(mc->alias) : NULL,
cflags, mc, default_val);
return REDISMODULE_OK;
}
@ -12945,9 +13193,9 @@ int RM_RegisterEnumConfig(RedisModuleCtx *ctx, const char *name, int default_val
if (moduleConfigValidityCheck(module, name, flags, ENUM_CONFIG)) {
return REDISMODULE_ERR;
}
ModuleConfig *new_config = createModuleConfig(name, applyfn, privdata, module);
new_config->get_fn.get_enum = getfn;
new_config->set_fn.set_enum = setfn;
ModuleConfig *mc = createModuleConfig(name, applyfn, privdata, module, flags);
mc->get_fn.get_enum = getfn;
mc->set_fn.set_enum = setfn;
configEnum *enum_vals = zmalloc((num_enum_vals + 1) * sizeof(configEnum));
for (int i = 0; i < num_enum_vals; i++) {
enum_vals[i].name = zstrdup(enum_values[i]);
@ -12955,9 +13203,11 @@ int RM_RegisterEnumConfig(RedisModuleCtx *ctx, const char *name, int default_val
}
enum_vals[num_enum_vals].name = NULL;
enum_vals[num_enum_vals].val = 0;
listAddNodeTail(module->module_configs, new_config);
flags = maskModuleConfigFlags(flags) | maskModuleEnumConfigFlags(flags);
addModuleEnumConfig(module->name, name, flags, new_config, default_val, enum_vals);
listAddNodeTail(module->module_configs, mc);
unsigned int cflags = maskModuleConfigFlags(flags) | maskModuleEnumConfigFlags(flags);
addModuleEnumConfig(sdsdup(mc->name), (mc->alias) ? sdsdup(mc->alias) : NULL,
cflags, mc, default_val, enum_vals, num_enum_vals);
return REDISMODULE_OK;
}
@ -12970,13 +13220,15 @@ int RM_RegisterNumericConfig(RedisModuleCtx *ctx, const char *name, long long de
if (moduleConfigValidityCheck(module, name, flags, NUMERIC_CONFIG)) {
return REDISMODULE_ERR;
}
ModuleConfig *new_config = createModuleConfig(name, applyfn, privdata, module);
new_config->get_fn.get_numeric = getfn;
new_config->set_fn.set_numeric = setfn;
listAddNodeTail(module->module_configs, new_config);
ModuleConfig *mc = createModuleConfig(name, applyfn, privdata, module, flags);
mc->get_fn.get_numeric = getfn;
mc->set_fn.set_numeric = setfn;
listAddNodeTail(module->module_configs, mc);
unsigned int numeric_flags = maskModuleNumericConfigFlags(flags);
flags = maskModuleConfigFlags(flags);
addModuleNumericConfig(module->name, name, flags, new_config, default_val, numeric_flags, min, max);
unsigned int cflags = maskModuleConfigFlags(flags);
addModuleNumericConfig(sdsdup(mc->name), (mc->alias) ? sdsdup(mc->alias) : NULL,
cflags, mc, default_val, numeric_flags, min, max);
return REDISMODULE_OK;
}
@ -13184,7 +13436,7 @@ NULL
} else if (!strcasecmp(subcmd,"unload") && c->argc == 3) {
const char *errmsg = NULL;
if (moduleUnload(c->argv[2]->ptr, &errmsg) == C_OK)
if (moduleUnload(c->argv[2]->ptr, &errmsg, 0) == C_OK)
addReply(c,shared.ok);
else {
if (errmsg == NULL) errmsg = "operation not possible.";
@ -13826,6 +14078,7 @@ void moduleRegisterCoreAPI(void) {
REGISTER_API(ZsetRangeEndReached);
REGISTER_API(HashSet);
REGISTER_API(HashGet);
REGISTER_API(HashFieldMinExpire);
REGISTER_API(StreamAdd);
REGISTER_API(StreamDelete);
REGISTER_API(StreamIteratorStart);
@ -14024,6 +14277,7 @@ void moduleRegisterCoreAPI(void) {
REGISTER_API(GetModuleUserFromUserName);
REGISTER_API(ACLCheckCommandPermissions);
REGISTER_API(ACLCheckKeyPermissions);
REGISTER_API(ACLCheckKeyPrefixPermissions);
REGISTER_API(ACLCheckChannelPermissions);
REGISTER_API(ACLAddLogEntry);
REGISTER_API(ACLAddLogEntryByUserName);

View File

@ -355,7 +355,12 @@ int isWatchedKeyExpired(client *c) {
}
/* "Touch" a key, so that if this key is being WATCHed by some client the
* next EXEC will fail. */
* next EXEC will fail.
*
* Sanitizer suppression: IO threads also read c->flags, but never modify
* it or read the CLIENT_DIRTY_CAS bit, main thread just only modifies
* this bit, so there is actually no real data race. */
REDIS_NO_SANITIZE("thread")
void touchWatchedKey(redisDb *db, robj *key) {
list *clients;
listIter li;
@ -404,6 +409,7 @@ void touchWatchedKey(redisDb *db, robj *key) {
* replaced_with: for SWAPDB, the WATCH should be invalidated if
* the key exists in either of them, and skipped only if it
* doesn't exist in both. */
REDIS_NO_SANITIZE("thread")
void touchAllWatchedKeysInDb(redisDb *emptied, redisDb *replaced_with) {
listIter li;
listNode *ln;

File diff suppressed because it is too large Load Diff

View File

@ -9,7 +9,7 @@
#include "server.h"
/* This file implements keyspace events notification via Pub/Sub and
* described at https://redis.io/topics/notifications. */
* described at https://redis.io/docs/latest/develop/use/keyspace-notifications/. */
/* Turn a string representing notification classes into an integer
* representing notification classes flags xored.

View File

@ -680,6 +680,18 @@ robj *tryObjectEncoding(robj *o) {
return tryObjectEncodingEx(o, 1);
}
size_t getObjectLength(robj *o) {
switch(o->type) {
case OBJ_STRING: return stringObjectLen(o);
case OBJ_LIST: return listTypeLength(o);
case OBJ_SET: return setTypeSize(o);
case OBJ_ZSET: return zsetLength(o);
case OBJ_HASH: return hashTypeLength(o, 0);
case OBJ_STREAM: return streamLength(o);
default: return 0;
}
}
/* Get a decoded version of an encoded object (returned as a new object).
* If the object is already raw-encoded just increment the ref count. */
robj *getDecodedObject(robj *o) {
@ -1218,12 +1230,16 @@ struct redisMemOverhead *getMemoryOverheadData(void) {
mh->aof_buffer = mem;
mem_total+=mem;
mem = evalScriptsMemory();
mh->lua_caches = mem;
mem = evalScriptsMemoryEngine();
mh->eval_caches = mem;
mem_total+=mem;
mh->functions_caches = functionsMemoryOverhead();
mh->functions_caches = functionsMemoryEngine();
mem_total+=mh->functions_caches;
mh->script_vm = evalScriptsMemoryVM();
mh->script_vm += functionsMemoryVM();
mem_total+=mh->script_vm;
for (j = 0; j < server.dbnum; j++) {
redisDb *db = server.db+j;
if (!kvstoreNumAllocatedDicts(db->keys)) continue;
@ -1544,7 +1560,7 @@ NULL
} else if (!strcasecmp(c->argv[1]->ptr,"stats") && c->argc == 2) {
struct redisMemOverhead *mh = getMemoryOverheadData();
addReplyMapLen(c,31+mh->num_dbs);
addReplyMapLen(c,32+mh->num_dbs);
addReplyBulkCString(c,"peak.allocated");
addReplyLongLong(c,mh->peak_allocated);
@ -1571,11 +1587,14 @@ NULL
addReplyLongLong(c,mh->aof_buffer);
addReplyBulkCString(c,"lua.caches");
addReplyLongLong(c,mh->lua_caches);
addReplyLongLong(c,mh->eval_caches);
addReplyBulkCString(c,"functions.caches");
addReplyLongLong(c,mh->functions_caches);
addReplyBulkCString(c,"script.VMs");
addReplyLongLong(c,mh->script_vm);
for (size_t j = 0; j < mh->num_dbs; j++) {
char dbname[32];
snprintf(dbname,sizeof(dbname),"db.%zd",mh->db[j].dbid);

View File

@ -1244,10 +1244,17 @@ int quicklistDelRange(quicklist *quicklist, const long start,
/* compare between a two entries */
int quicklistCompare(quicklistEntry* entry, unsigned char *p2, const size_t p2_len) {
if (unlikely(QL_NODE_IS_PLAIN(entry->node))) {
if (entry->value) {
return ((entry->sz == p2_len) && (memcmp(entry->value, p2, p2_len) == 0));
} else {
/* We use string2ll() to get an integer representation of the
* string 'p2' and compare it to 'entry->longval', it's much
* faster than convert integer to string and comparing. */
long long sval;
if (string2ll((const char*)p2, p2_len, &sval))
return entry->longval == sval;
}
return lpCompare(entry->zi, p2, p2_len);
return 0;
}
/* Returns a quicklist iterator 'iter'. After the initialization every
@ -2119,7 +2126,7 @@ int quicklistTest(int argc, char *argv[], int flags) {
quicklistRelease(ql);
}
TEST("Comprassion Plain node") {
TEST("Compression Plain node") {
for (int f = 0; f < fill_count; f++) {
size_t large_limit = (fills[f] < 0) ? quicklistNodeNegFillLimit(fills[f]) + 1 : SIZE_SAFETY_LIMIT + 1;
@ -3294,7 +3301,7 @@ int quicklistTest(int argc, char *argv[], int flags) {
}
#if ULONG_MAX >= 0xffffffffffffffff
TEST("compress and decomress quicklist plain node large than UINT32_MAX") {
TEST("compress and decompress quicklist plain node larger than UINT32_MAX") {
size_t sz = (1ull << 32);
unsigned char *s = zmalloc(sz);
randstring(s, sz);

View File

@ -2,8 +2,13 @@
* Copyright (c) 2009-Present, Redis Ltd.
* All rights reserved.
*
* Copyright (c) 2024-present, Valkey contributors.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
*
* Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
*/
#include "server.h"
@ -2332,6 +2337,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int dbid, int *error)
rdbReportCorruptRDB("invalid expireAt time: %llu",
(unsigned long long) expireAt);
decrRefCount(o);
if (dupSearchDict != NULL) dictRelease(dupSearchDict);
return NULL;
}
@ -3809,8 +3815,10 @@ static void backgroundSaveDoneHandlerSocket(int exitcode, int bysignal) {
}
if (server.rdb_child_exit_pipe!=-1)
close(server.rdb_child_exit_pipe);
if (server.rdb_pipe_read != -1) {
aeDeleteFileEvent(server.el, server.rdb_pipe_read, AE_READABLE);
close(server.rdb_pipe_read);
}
server.rdb_child_exit_pipe = -1;
server.rdb_pipe_read = -1;
zfree(server.rdb_pipe_conns);
@ -3874,7 +3882,8 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
listNode *ln;
listIter li;
pid_t childpid;
int pipefds[2], rdb_pipe_write, safe_to_exit_pipe;
int pipefds[2], rdb_pipe_write = 0, safe_to_exit_pipe = 0;
int rdb_channel = (req & SLAVE_REQ_RDB_CHANNEL);
if (hasActiveChildProcess()) return C_ERR;
@ -3882,6 +3891,7 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
* drained the pipe. */
if (server.rdb_pipe_conns) return C_ERR;
if (!rdb_channel) {
/* Before to fork, create a pipe that is used to transfer the rdb bytes to
* the parent, we can't let it write directly to the sockets, since in case
* of TLS we must let the parent handle a continuous TLS state when the
@ -3899,12 +3909,12 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
}
safe_to_exit_pipe = pipefds[0]; /* read end */
server.rdb_child_exit_pipe = pipefds[1]; /* write end */
}
/* Collect the connections of the replicas we want to transfer
* the RDB to, which are i WAIT_BGSAVE_START state. */
server.rdb_pipe_conns = zmalloc(sizeof(connection *)*listLength(server.slaves));
server.rdb_pipe_numconns = 0;
server.rdb_pipe_numconns_writing = 0;
* the RDB to, which are in WAIT_BGSAVE_START state. */
int numconns = 0;
connection **conns = zmalloc(sizeof(*conns) * listLength(server.slaves));
listRewind(server.slaves,&li);
while((ln = listNext(&li))) {
client *slave = ln->value;
@ -3912,10 +3922,21 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
/* Check slave has the exact requirements */
if (slave->slave_req != req)
continue;
server.rdb_pipe_conns[server.rdb_pipe_numconns++] = slave->conn;
replicationSetupSlaveForFullResync(slave,getPsyncInitialOffset());
replicationSetupSlaveForFullResync(slave, getPsyncInitialOffset());
conns[numconns++] = slave->conn;
if (rdb_channel) {
/* Put the socket in blocking mode to simplify RDB transfer. */
connSendTimeout(slave->conn, server.repl_timeout * 1000);
connBlock(slave->conn);
}
}
}
if (!rdb_channel) {
server.rdb_pipe_conns = conns;
server.rdb_pipe_numconns = numconns;
server.rdb_pipe_numconns_writing = 0;
}
/* Create the child process. */
if ((childpid = redisFork(CHILD_TYPE_RDB)) == 0) {
@ -3923,11 +3944,14 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
int retval, dummy;
rio rdb;
if (rdb_channel) {
rioInitWithConnset(&rdb, conns, numconns);
} else {
rioInitWithFd(&rdb,rdb_pipe_write);
/* Close the reading part, so that if the parent crashes, the child will
* get a write error and exit. */
/* Close the reading part, so that if the parent crashes, the child
* will get a write error and exit. */
close(server.rdb_pipe_read);
}
redisSetProcTitle("redis-rdb-to-slaves");
redisSetCpuAffinity(server.bgsave_cpulist);
@ -3940,6 +3964,9 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
sendChildCowInfo(CHILD_INFO_TYPE_RDB_COW_SIZE, "RDB");
}
if (rdb_channel) {
rioFreeConnset(&rdb);
} else {
rioFreeFd(&rdb);
/* wake up the reader, tell it we're done. */
close(rdb_pipe_write);
@ -3948,6 +3975,8 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
* to read anything, just get the error when the pipe is closed. */
dummy = read(safe_to_exit_pipe, pipefds, 1);
UNUSED(dummy);
}
zfree(conns);
exitFromChild((retval == C_OK) ? 0 : 1);
} else {
/* Parent */
@ -3965,6 +3994,8 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
slave->replstate = SLAVE_STATE_WAIT_BGSAVE_START;
}
}
if (!rdb_channel) {
close(rdb_pipe_write);
close(server.rdb_pipe_read);
close(server.rdb_child_exit_pipe);
@ -3972,17 +4003,24 @@ int rdbSaveToSlavesSockets(int req, rdbSaveInfo *rsi) {
server.rdb_pipe_conns = NULL;
server.rdb_pipe_numconns = 0;
server.rdb_pipe_numconns_writing = 0;
}
} else {
serverLog(LL_NOTICE,"Background RDB transfer started by pid %ld",
(long) childpid);
serverLog(LL_NOTICE, "Background RDB transfer started by pid %ld to %s", (long)childpid,
rdb_channel ? "replica socket" : "parent process pipe");
server.rdb_save_time_start = time(NULL);
server.rdb_child_type = RDB_CHILD_TYPE_SOCKET;
if (!rdb_channel) {
close(rdb_pipe_write); /* close write in parent so that it can detect the close on the child. */
if (aeCreateFileEvent(server.el, server.rdb_pipe_read, AE_READABLE, rdbPipeReadHandler,NULL) == AE_ERR) {
serverPanic("Unrecoverable error creating server.rdb_pipe_read file event.");
}
}
}
if (rdb_channel)
zfree(conns);
else
close(safe_to_exit_pipe);
return (childpid == -1) ? C_ERR : C_OK;
}
return C_OK; /* Unreached. */

View File

@ -121,6 +121,7 @@ typedef long long ustime_t;
#define REDISMODULE_HASH_CFIELDS (1<<2)
#define REDISMODULE_HASH_EXISTS (1<<3)
#define REDISMODULE_HASH_COUNT_ALL (1<<4)
#define REDISMODULE_HASH_EXPIRE_TIME (1<<5)
#define REDISMODULE_CONFIG_DEFAULT 0 /* This is the default for a module config. */
#define REDISMODULE_CONFIG_IMMUTABLE (1ULL<<0) /* Can this value only be set at startup? */
@ -131,6 +132,7 @@ typedef long long ustime_t;
#define REDISMODULE_CONFIG_MEMORY (1ULL<<7) /* Indicates if this value can be set as a memory value */
#define REDISMODULE_CONFIG_BITFLAGS (1ULL<<8) /* Indicates if this value can be set as a multiple enum values */
#define REDISMODULE_CONFIG_UNPREFIXED (1ULL<<9) /* Provided configuration name won't be prefixed with the module name */
/* StreamID type. */
typedef struct RedisModuleStreamID {
@ -1082,6 +1084,7 @@ REDISMODULE_API int (*RedisModule_ZsetRangePrev)(RedisModuleKey *key) REDISMODUL
REDISMODULE_API int (*RedisModule_ZsetRangeEndReached)(RedisModuleKey *key) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_HashSet)(RedisModuleKey *key, int flags, ...) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_HashGet)(RedisModuleKey *key, int flags, ...) REDISMODULE_ATTR;
REDISMODULE_API mstime_t (*RedisModule_HashFieldMinExpire)(RedisModuleKey *key) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_StreamAdd)(RedisModuleKey *key, int flags, RedisModuleStreamID *id, RedisModuleString **argv, int64_t numfields) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_StreamDelete)(RedisModuleKey *key, RedisModuleStreamID *id) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_StreamIteratorStart)(RedisModuleKey *key, int flags, RedisModuleStreamID *startid, RedisModuleStreamID *endid) REDISMODULE_ATTR;
@ -1287,6 +1290,7 @@ REDISMODULE_API RedisModuleString * (*RedisModule_GetCurrentUserName)(RedisModul
REDISMODULE_API RedisModuleUser * (*RedisModule_GetModuleUserFromUserName)(RedisModuleString *name) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_ACLCheckCommandPermissions)(RedisModuleUser *user, RedisModuleString **argv, int argc) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_ACLCheckKeyPermissions)(RedisModuleUser *user, RedisModuleString *key, int flags) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_ACLCheckKeyPrefixPermissions)(RedisModuleUser *user, RedisModuleString *prefix, int flags) REDISMODULE_ATTR;
REDISMODULE_API int (*RedisModule_ACLCheckChannelPermissions)(RedisModuleUser *user, RedisModuleString *ch, int literal) REDISMODULE_ATTR;
REDISMODULE_API void (*RedisModule_ACLAddLogEntry)(RedisModuleCtx *ctx, RedisModuleUser *user, RedisModuleString *object, RedisModuleACLLogEntryReason reason) REDISMODULE_ATTR;
REDISMODULE_API void (*RedisModule_ACLAddLogEntryByUserName)(RedisModuleCtx *ctx, RedisModuleString *user, RedisModuleString *object, RedisModuleACLLogEntryReason reason) REDISMODULE_ATTR;
@ -1451,6 +1455,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int
REDISMODULE_GET_API(ZsetRangeEndReached);
REDISMODULE_GET_API(HashSet);
REDISMODULE_GET_API(HashGet);
REDISMODULE_GET_API(HashFieldMinExpire);
REDISMODULE_GET_API(StreamAdd);
REDISMODULE_GET_API(StreamDelete);
REDISMODULE_GET_API(StreamIteratorStart);
@ -1656,6 +1661,7 @@ static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int
REDISMODULE_GET_API(GetModuleUserFromUserName);
REDISMODULE_GET_API(ACLCheckCommandPermissions);
REDISMODULE_GET_API(ACLCheckKeyPermissions);
REDISMODULE_GET_API(ACLCheckKeyPrefixPermissions);
REDISMODULE_GET_API(ACLCheckChannelPermissions);
REDISMODULE_GET_API(ACLAddLogEntry);
REDISMODULE_GET_API(ACLAddLogEntryByUserName);

File diff suppressed because it is too large Load Diff

174
src/rio.c
View File

@ -1,3 +1,16 @@
/*
* Copyright (c) 2009-Present, Redis Ltd.
* All rights reserved.
*
* Copyright (c) 2024-present, Valkey contributors.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
*
* Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
*/
/* rio.c is a simple stream-oriented I/O abstraction that provides an interface
* to write code that can consume/produce data using different concrete input
* and output devices. For instance the same rdb.c code using the rio
@ -14,34 +27,6 @@
* for the current checksum.
*
* ----------------------------------------------------------------------------
*
* Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
* Copyright (c) 2009-current, Redis Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
@ -429,6 +414,139 @@ void rioFreeFd(rio *r) {
sdsfree(r->io.fd.buf);
}
/* ------------------- Connection set implementation ------------------
* This target is used to write the RDB file to a set of replica connections as
* part of rdb channel replication. */
/* Returns 1 for success, 0 for failure.
* The function returns success as long as we are able to correctly write
* to at least one file descriptor.
*
* When buf is NULL or len is 0, the function performs a flush operation if
* there is some pending buffer, so this function is also used in order to
* implement rioConnsetFlush(). */
static size_t rioConnsetWrite(rio *r, const void *buf, size_t len) {
const size_t pre_flush_size = 256 * 1024;
unsigned char *p = (unsigned char*) buf;
size_t buflen = len;
size_t failed = 0; /* number of connections that write() returned error. */
/* For small writes, we rather keep the data in user-space buffer, and flush
* it only when it grows. however for larger writes, we prefer to flush
* any pre-existing buffer, and write the new one directly without reallocs
* and memory copying. */
if (len > pre_flush_size) {
rioConnsetWrite(r, NULL, 0);
} else {
if (buf && len) {
r->io.connset.buf = sdscatlen(r->io.connset.buf, buf, len);
if (sdslen(r->io.connset.buf) <= PROTO_IOBUF_LEN)
return 1;
}
p = (unsigned char *)r->io.connset.buf;
buflen = sdslen(r->io.connset.buf);
}
while (buflen > 0) {
/* Write in little chunks so that when there are big writes we
* parallelize while the kernel is sending data in background to the
* TCP socket. */
size_t limit = PROTO_IOBUF_LEN * 2;
size_t count = buflen < limit ? buflen : limit;
for (size_t i = 0; i < r->io.connset.n_dst; i++) {
size_t n_written = 0;
if (r->io.connset.dst[i].failed != 0) {
failed++;
continue; /* Skip failed connections. */
}
do {
ssize_t ret;
connection *c = r->io.connset.dst[i].conn;
ret = connWrite(c, p + n_written, count - n_written);
if (ret <= 0) {
if (errno == 0)
errno = EIO;
/* With blocking sockets, which is the sole user of this
* rio target, EWOULDBLOCK is returned only because of
* the SO_SNDTIMEO socket option, so we translate the error
* into one more recognizable by the user. */
if (ret == -1 && errno == EWOULDBLOCK)
errno = ETIMEDOUT;
r->io.connset.dst[i].failed = 1;
break;
}
n_written += ret;
} while (n_written != count);
}
if (failed == r->io.connset.n_dst)
return 0; /* All the connections have failed. */
p += count;
buflen -= count;
r->io.connset.pos += count;
}
sdsclear(r->io.connset.buf);
return 1;
}
/* Returns 1 or 0 for success/failure. */
static size_t rioConnsetRead(rio *r, void *buf, size_t len) {
UNUSED(r);
UNUSED(buf);
UNUSED(len);
return 0; /* Error, this target does not support reading. */
}
/* Returns the number of sent bytes. */
static off_t rioConnsetTell(rio *r) {
return r->io.connset.pos;
}
/* Flushes any buffer to target device if applicable. Returns 1 on success
* and 0 on failures. */
static int rioConnsetFlush(rio *r) {
/* Our flush is implemented by the write method, that recognizes a
* buffer set to NULL with a count of zero as a flush request. */
return rioConnsetWrite(r, NULL, 0);
}
static const rio rioConnsetIO = {
rioConnsetRead,
rioConnsetWrite,
rioConnsetTell,
rioConnsetFlush,
NULL, /* update_checksum */
0, /* current checksum */
0, /* flags */
0, /* bytes read or written */
0, /* read/write chunk size */
{ { NULL, 0 } } /* union for io-specific vars */
};
void rioInitWithConnset(rio *r, connection **conns, size_t n_conns) {
*r = rioConnsetIO;
r->io.connset.dst = zcalloc(sizeof(*r->io.connset.dst) * n_conns);
r->io.connset.n_dst = n_conns;
r->io.connset.pos = 0;
r->io.connset.buf = sdsempty();
for (size_t i = 0; i < n_conns; i++)
r->io.connset.dst[i].conn = conns[i];
}
/* release the rio stream. */
void rioFreeConnset(rio *r) {
zfree(r->io.connset.dst);
sdsfree(r->io.connset.buf);
}
/* ---------------------------- Generic functions ---------------------------- */
/* This function can be installed both in memory and file streams when checksum

View File

@ -1,31 +1,14 @@
/*
* Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
* Copyright (c) 2009-current, Redis Ltd.
* Copyright (c) 2009-Present, Redis Ltd.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* Copyright (c) 2024-present, Valkey contributors.
* All rights reserved.
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Redis nor the names of its contributors may be used
* to endorse or promote products derived from this software without
* specific prior written permission.
* Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
*/
@ -39,6 +22,7 @@
#define RIO_FLAG_READ_ERROR (1<<0)
#define RIO_FLAG_WRITE_ERROR (1<<1)
#define RIO_FLAG_ABORT (1<<2)
#define RIO_TYPE_FILE (1<<0)
#define RIO_TYPE_BUFFER (1<<1)
@ -97,6 +81,17 @@ struct _rio {
off_t pos;
sds buf;
} fd;
/* Multiple connections target (used to write to N sockets). */
struct {
struct {
connection *conn; /* Connection */
int failed; /* If write failed on this connection. */
} *dst;
size_t n_dst; /* Number of connections */
off_t pos; /* Number of sent bytes */
sds buf;
} connset;
} io;
};
@ -107,7 +102,7 @@ typedef struct _rio rio;
* if needed. */
static inline size_t rioWrite(rio *r, const void *buf, size_t len) {
if (r->flags & RIO_FLAG_WRITE_ERROR) return 0;
if (r->flags & (RIO_FLAG_WRITE_ERROR | RIO_FLAG_ABORT)) return 0;
while (len) {
size_t bytes_to_write = (r->max_processing_chunk && r->max_processing_chunk < len) ? r->max_processing_chunk : len;
if (r->update_cksum) r->update_cksum(r,buf,bytes_to_write);
@ -123,7 +118,7 @@ static inline size_t rioWrite(rio *r, const void *buf, size_t len) {
}
static inline size_t rioRead(rio *r, void *buf, size_t len) {
if (r->flags & RIO_FLAG_READ_ERROR) return 0;
if (r->flags & (RIO_FLAG_READ_ERROR | RIO_FLAG_ABORT)) return 0;
while (len) {
size_t bytes_to_read = (r->max_processing_chunk && r->max_processing_chunk < len) ? r->max_processing_chunk : len;
if (r->read(r,buf,bytes_to_read) == 0) {
@ -146,6 +141,10 @@ static inline int rioFlush(rio *r) {
return r->flush(r);
}
static inline void rioAbort(rio *r) {
r->flags |= RIO_FLAG_ABORT;
}
/* This function allows to know if there was a read error in any past
* operation, since the rio stream was created or since the last call
* to rioClearError(). */
@ -159,16 +158,18 @@ static inline int rioGetWriteError(rio *r) {
}
static inline void rioClearErrors(rio *r) {
r->flags &= ~(RIO_FLAG_READ_ERROR|RIO_FLAG_WRITE_ERROR);
r->flags &= ~(RIO_FLAG_READ_ERROR|RIO_FLAG_WRITE_ERROR|RIO_FLAG_ABORT);
}
void rioInitWithFile(rio *r, FILE *fp);
void rioInitWithBuffer(rio *r, sds s);
void rioInitWithConn(rio *r, connection *conn, size_t read_limit);
void rioInitWithFd(rio *r, int fd);
void rioInitWithConnset(rio *r, connection **conns, size_t n_conns);
void rioFreeFd(rio *r);
void rioFreeConn(rio *r, sds* out_remainingBufferedData);
void rioFreeConnset(rio *r);
size_t rioWriteBulkCount(rio *r, char prefix, long count);
size_t rioWriteBulkString(rio *r, const char *buf, size_t len);

View File

@ -64,7 +64,7 @@ lua_State *createLuaState(void) {
size_t sz = sizeof(unsigned int);
int err = je_mallctl("tcache.create", (void *)&tcache, &sz, NULL, 0);
if (err) {
serverLog(LL_WARNING, "Failed creating the lua jemalloc tcache.");
serverLog(LL_WARNING, "Failed creating the lua jemalloc tcache (err=%d).", err);
exit(1);
}
@ -79,7 +79,7 @@ void luaEnvInit(void) {
size_t sz = sizeof(unsigned int);
int err = je_mallctl("arenas.create", (void *)&arena, &sz, NULL, 0);
if (err) {
serverLog(LL_WARNING, "Failed creating the lua jemalloc arena.");
serverLog(LL_WARNING, "Failed creating the lua jemalloc arena (err=%d).", err);
exit(1);
}
server.lua_arena = arena;

View File

@ -174,6 +174,11 @@ void sdsfree(sds s) {
s_free((char*)s-sdsHdrSize(s[-1]));
}
/* Generic version of sdsfree. */
void sdsfreegeneric(void *s) {
sdsfree((sds)s);
}
/* Set the sds string length to the length as obtained with strlen(), so
* considering as content only up to the first null term character.
*
@ -1443,29 +1448,29 @@ int sdsTest(int argc, char **argv, int flags) {
/* Test sdsresize - extend */
x = sdsnew("1234567890123456789012345678901234567890");
x = sdsResize(x, 200, 1);
test_cond("sdsrezie() expand len", sdslen(x) == 40);
test_cond("sdsrezie() expand strlen", strlen(x) == 40);
test_cond("sdsrezie() expand alloc", sdsalloc(x) == 200);
test_cond("sdsresize() expand len", sdslen(x) == 40);
test_cond("sdsresize() expand strlen", strlen(x) == 40);
test_cond("sdsresize() expand alloc", sdsalloc(x) == 200);
/* Test sdsresize - trim free space */
x = sdsResize(x, 80, 1);
test_cond("sdsrezie() shrink len", sdslen(x) == 40);
test_cond("sdsrezie() shrink strlen", strlen(x) == 40);
test_cond("sdsrezie() shrink alloc", sdsalloc(x) == 80);
test_cond("sdsresize() shrink len", sdslen(x) == 40);
test_cond("sdsresize() shrink strlen", strlen(x) == 40);
test_cond("sdsresize() shrink alloc", sdsalloc(x) == 80);
/* Test sdsresize - crop used space */
x = sdsResize(x, 30, 1);
test_cond("sdsrezie() crop len", sdslen(x) == 30);
test_cond("sdsrezie() crop strlen", strlen(x) == 30);
test_cond("sdsrezie() crop alloc", sdsalloc(x) == 30);
test_cond("sdsresize() crop len", sdslen(x) == 30);
test_cond("sdsresize() crop strlen", strlen(x) == 30);
test_cond("sdsresize() crop alloc", sdsalloc(x) == 30);
/* Test sdsresize - extend to different class */
x = sdsResize(x, 400, 1);
test_cond("sdsrezie() expand len", sdslen(x) == 30);
test_cond("sdsrezie() expand strlen", strlen(x) == 30);
test_cond("sdsrezie() expand alloc", sdsalloc(x) == 400);
test_cond("sdsresize() expand len", sdslen(x) == 30);
test_cond("sdsresize() expand strlen", strlen(x) == 30);
test_cond("sdsresize() expand alloc", sdsalloc(x) == 400);
/* Test sdsresize - shrink to different class */
x = sdsResize(x, 4, 1);
test_cond("sdsrezie() crop len", sdslen(x) == 4);
test_cond("sdsrezie() crop strlen", strlen(x) == 4);
test_cond("sdsrezie() crop alloc", sdsalloc(x) == 4);
test_cond("sdsresize() crop len", sdslen(x) == 4);
test_cond("sdsresize() crop strlen", strlen(x) == 4);
test_cond("sdsresize() crop alloc", sdsalloc(x) == 4);
sdsfree(x);
}
return 0;

View File

@ -198,6 +198,7 @@ sds sdsnew(const char *init);
sds sdsempty(void);
sds sdsdup(const sds s);
void sdsfree(sds s);
void sdsfreegeneric(void *s);
sds sdsgrowzero(sds s, size_t len);
sds sdscatlen(sds s, const void *t, size_t len);
sds sdscat(sds s, const char *t);

View File

@ -521,7 +521,8 @@ dictType commandTableDictType = {
dictSdsKeyCaseCompare, /* key compare */
dictSdsDestructor, /* key destructor */
NULL, /* val destructor */
NULL /* allow to expand */
NULL, /* allow to expand */
.force_full_rehash = 1, /* force full rehashing */
};
/* Hash type hash table (note that small hashes are represented with listpacks) */
@ -636,7 +637,8 @@ dictType clientDictType = {
NULL, /* key dup */
NULL, /* val dup */
dictClientKeyCompare, /* key compare */
.no_value = 1 /* no values in this dict */
.no_value = 1, /* no values in this dict */
.keys_are_odd = 0 /* a client pointer is not an odd pointer */
};
/* This function is called once a background process of some kind terminates,
@ -784,6 +786,23 @@ int clientsCronResizeQueryBuffer(client *c) {
return 0;
}
/* If the client has been idle for too long, free the client's arguments. */
int clientsCronFreeArgvIfIdle(client *c) {
/* If the client is in the middle of parsing a command, or if argv is in use
* (e.g. parsed in the IO thread but not yet executed, or blocked), exit ASAP. */
if (!c->argv || c->multibulklen || c->argc) return 0;
/* Free argv if the client has been idle for more than 2 seconds or if argv
* size is too large. */
time_t idletime = server.unixtime - c->lastinteraction;
if (idletime > 2 || c->argv_len > 128) {
c->argv_len = 0;
zfree(c->argv);
c->argv = NULL;
}
return 0;
}
/* The client output buffer can be adjusted to better fit the memory requirements.
*
* the logic is:
@ -949,7 +968,7 @@ void removeClientFromMemUsageBucket(client *c, int allow_eviction) {
* returns 1 if client eviction for this client is allowed, 0 otherwise.
*/
int updateClientMemUsageAndBucket(client *c) {
serverAssert(io_threads_op == IO_THREADS_OP_IDLE && c->conn);
serverAssert(pthread_equal(pthread_self(), server.main_thread_id) && c->conn);
int allow_eviction = clientEvictionAllowed(c);
removeClientFromMemUsageBucket(c, allow_eviction);
@ -1001,6 +1020,7 @@ void getExpansiveClientsInfo(size_t *in_usage, size_t *out_usage) {
* default server.hz value is 10, so sometimes here we need to process thousands
* of clients per second, turning this function into a source of latency.
*/
#define CLIENTS_CRON_PAUSE_IOTHREAD 8
#define CLIENTS_CRON_MIN_ITERATIONS 5
void clientsCron(void) {
/* Try to process at least numclients/server.hz of clients
@ -1035,6 +1055,15 @@ void clientsCron(void) {
ClientsPeakMemInput[zeroidx] = 0;
ClientsPeakMemOutput[zeroidx] = 0;
/* Pause the IO threads that are processing clients, to let us access clients
* safely. In order to avoid increasing CPU usage by pausing all threads when
* there are too many io threads, we pause io threads in multiple batches. */
static int start = 1, end = 0;
if (server.io_threads_num >= 1 && listLength(server.clients) > 0) {
end = start + CLIENTS_CRON_PAUSE_IOTHREAD - 1;
if (end >= server.io_threads_num) end = server.io_threads_num - 1;
pauseIOThreadsRange(start, end);
}
while(listLength(server.clients) && iterations--) {
client *c;
@ -1045,11 +1074,21 @@ void clientsCron(void) {
head = listFirst(server.clients);
c = listNodeValue(head);
listRotateHeadToTail(server.clients);
if (c->running_tid != IOTHREAD_MAIN_THREAD_ID &&
!(c->running_tid >= start && c->running_tid <= end))
{
/* Skip clients that are being processed by the IO threads that
* are not paused. */
continue;
}
/* The following functions do different service checks on the client.
* The protocol is that they return non-zero if the client was
* terminated. */
if (clientsCronHandleTimeout(c,now)) continue;
if (clientsCronResizeQueryBuffer(c)) continue;
if (clientsCronFreeArgvIfIdle(c)) continue;
if (clientsCronResizeOutputBuffer(c,now)) continue;
if (clientsCronTrackExpansiveClients(c, curr_peak_mem_usage_slot)) continue;
@ -1065,6 +1104,14 @@ void clientsCron(void) {
if (closeClientOnOutputBufferLimitReached(c, 0)) continue;
}
/* Resume the IO threads that were paused */
if (end) {
resumeIOThreadsRange(start, end);
start = end + 1;
if (start >= server.io_threads_num) start = 1;
end = 0;
}
}
/* This function handles 'background' operations we are required to do
@ -1397,7 +1444,7 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
serverLog(LL_DEBUG,
"%lu clients connected (%lu replicas), %zu bytes in use",
listLength(server.clients)-listLength(server.slaves),
listLength(server.slaves),
replicationLogicalReplicaCount(),
zmalloc_used_memory());
}
}
@ -1513,9 +1560,6 @@ int serverCron(struct aeEventLoop *eventLoop, long long id, void *clientData) {
migrateCloseTimedoutSockets();
}
/* Stop the I/O threads if we don't have enough pending work. */
stopThreadedIOIfNeeded();
/* Resize tracking keys table if needed. This is also done at every
* command execution, but we want to be sure that if the last command
* executed changes the value via CONFIG SET, the server will perform
@ -1667,24 +1711,28 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
* events to handle. */
if (ProcessingEventsWhileBlocked) {
uint64_t processed = 0;
processed += handleClientsWithPendingReadsUsingThreads();
processed += connTypeProcessPendingData();
processed += connTypeProcessPendingData(server.el);
if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE)
flushAppendOnlyFile(0);
processed += handleClientsWithPendingWrites();
processed += freeClientsInAsyncFreeQueue();
/* Let the clients after the blocking call be processed. */
processClientsOfAllIOThreads();
/* New connections may have been established while blocked, clients from
* IO thread may have replies to write, ensure they are promptly sent to
* IO threads. */
processed += sendPendingClientsToIOThreads();
server.events_processed_while_blocked += processed;
return;
}
/* We should handle pending reads clients ASAP after event loop. */
handleClientsWithPendingReadsUsingThreads();
/* Handle pending data(typical TLS). (must be done before flushAppendOnlyFile) */
connTypeProcessPendingData();
connTypeProcessPendingData(server.el);
/* If any connection type(typical TLS) still has pending unread data don't sleep at all. */
int dont_sleep = connTypeHasPendingData();
int dont_sleep = connTypeHasPendingData(server.el);
/* Call the Redis Cluster before sleep function. Note that this function
* may change the state of Redis Cluster (from ok to fail or vice versa),
@ -1750,8 +1798,8 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
long long prev_fsynced_reploff = server.fsynced_reploff;
/* Write the AOF buffer on disk,
* must be done before handleClientsWithPendingWritesUsingThreads,
* in case of appendfsync=always. */
* must be done before handleClientsWithPendingWrites and
* sendPendingClientsToIOThreads, in case of appendfsync=always. */
if (server.aof_state == AOF_ON || server.aof_state == AOF_WAIT_REWRITE)
flushAppendOnlyFile(0);
@ -1773,7 +1821,10 @@ void beforeSleep(struct aeEventLoop *eventLoop) {
}
/* Handle writes with pending output buffers. */
handleClientsWithPendingWritesUsingThreads();
handleClientsWithPendingWrites();
/* Let io thread to handle its pending clients. */
sendPendingClientsToIOThreads();
/* Record cron time in beforeSleep. This does not include the time consumed by AOF writing and IO writing above. */
monotime cron_start_time_after_write = getMonotonicUs();
@ -2102,6 +2153,7 @@ void initServerConfig(void) {
memset(server.blocked_clients_by_type,0,
sizeof(server.blocked_clients_by_type));
server.shutdown_asap = 0;
server.crashing = 0;
server.shutdown_flags = 0;
server.shutdown_mstime = 0;
server.cluster_module_flags = CLUSTER_MODULE_FLAG_NONE;
@ -2131,6 +2183,8 @@ void initServerConfig(void) {
server.cached_master = NULL;
server.master_initial_offset = -1;
server.repl_state = REPL_STATE_NONE;
server.repl_rdb_ch_state = REPL_RDB_CH_STATE_NONE;
server.repl_full_sync_buffer = (struct replDataBuf) {0};
server.repl_transfer_tmpfile = NULL;
server.repl_transfer_fd = -1;
server.repl_transfer_s = NULL;
@ -2568,10 +2622,10 @@ void resetServerStats(void) {
server.stat_sync_full = 0;
server.stat_sync_partial_ok = 0;
server.stat_sync_partial_err = 0;
server.stat_io_reads_processed = 0;
atomicSet(server.stat_total_reads_processed, 0);
server.stat_io_writes_processed = 0;
atomicSet(server.stat_total_writes_processed, 0);
for (j = 0; j < IO_THREADS_MAX_NUM; j++) {
atomicSet(server.stat_io_reads_processed[j], 0);
atomicSet(server.stat_io_writes_processed[j], 0);
}
atomicSet(server.stat_client_qbuf_limit_disconnections, 0);
server.stat_client_outbuf_limit_disconnections = 0;
for (j = 0; j < STATS_METRIC_COUNT; j++) {
@ -2627,6 +2681,8 @@ void initServer(void) {
server.hz = server.config_hz;
server.pid = getpid();
server.in_fork_child = CHILD_TYPE_NONE;
server.rdb_pipe_read = -1;
server.rdb_child_exit_pipe = -1;
server.main_thread_id = pthread_self();
server.current_client = NULL;
server.errors = raxNew();
@ -2690,7 +2746,7 @@ void initServer(void) {
flags |= KVSTORE_FREE_EMPTY_DICTS;
}
for (j = 0; j < server.dbnum; j++) {
server.db[j].keys = kvstoreCreate(&dbDictType, slot_count_bits, flags);
server.db[j].keys = kvstoreCreate(&dbDictType, slot_count_bits, flags | KVSTORE_ALLOC_META_KEYS_HIST);
server.db[j].expires = kvstoreCreate(&dbExpiresDictType, slot_count_bits, flags);
server.db[j].hexpires = ebCreate();
server.db[j].expires_cursor = 0;
@ -2701,7 +2757,7 @@ void initServer(void) {
server.db[j].id = j;
server.db[j].avg_ttl = 0;
server.db[j].defrag_later = listCreate();
listSetFreeMethod(server.db[j].defrag_later,(void (*)(void*))sdsfree);
listSetFreeMethod(server.db[j].defrag_later, sdsfreegeneric);
}
evictionPoolAlloc(); /* Initialize the LRU keys pool. */
/* Note that server.pubsub_channels was chosen to be a kvstore (with only one dict, which
@ -2763,6 +2819,7 @@ void initServer(void) {
server.aof_last_write_errno = 0;
server.repl_good_slaves_count = 0;
server.last_sig_received = 0;
memset(server.io_threads_clients_num, 0, sizeof(server.io_threads_clients_num));
/* Initiate acl info struct */
server.acl_info.invalid_cmd_accesses = 0;
@ -3920,12 +3977,6 @@ int processCommand(client *c) {
reqresAppendRequest(c);
}
/* Handle possible security attacks. */
if (!strcasecmp(c->argv[0]->ptr,"host:") || !strcasecmp(c->argv[0]->ptr,"post")) {
securityWarningCommand(c);
return C_ERR;
}
/* If we're inside a module blocked context yielding that wants to avoid
* processing clients, postpone the command. */
if (server.busy_module_yield_flags != BUSY_MODULE_YIELD_NONE &&
@ -3940,7 +3991,15 @@ int processCommand(client *c) {
* In case we are reprocessing a command after it was blocked,
* we do not have to repeat the same checks */
if (!client_reprocessing_command) {
c->cmd = c->lastcmd = c->realcmd = lookupCommand(c->argv,c->argc);
struct redisCommand *cmd = c->iolookedcmd ? c->iolookedcmd : lookupCommand(c->argv, c->argc);
if (!cmd) {
/* Handle possible security attacks. */
if (!strcasecmp(c->argv[0]->ptr,"host:") || !strcasecmp(c->argv[0]->ptr,"post")) {
securityWarningCommand(c);
return C_ERR;
}
}
c->cmd = c->lastcmd = c->realcmd = cmd;
sds err;
if (!commandCheckExistence(c, &err)) {
rejectCommandSds(c, err);
@ -5403,7 +5462,10 @@ const char *replstateToString(int replstate) {
switch (replstate) {
case SLAVE_STATE_WAIT_BGSAVE_START:
case SLAVE_STATE_WAIT_BGSAVE_END:
case SLAVE_STATE_WAIT_RDB_CHANNEL:
return "wait_bgsave";
case SLAVE_STATE_SEND_BULK_AND_STREAM:
return "send_bulk_and_stream";
case SLAVE_STATE_SEND_BULK:
return "send_bulk";
case SLAVE_STATE_ONLINE:
@ -5520,8 +5582,8 @@ void releaseInfoSectionDict(dict *sec) {
* The resulting dictionary should be released with releaseInfoSectionDict. */
dict *genInfoSectionDict(robj **argv, int argc, char **defaults, int *out_all, int *out_everything) {
char *default_sections[] = {
"server", "clients", "memory", "persistence", "stats", "replication",
"cpu", "module_list", "errorstats", "cluster", "keyspace", NULL};
"server", "clients", "memory", "persistence", "stats", "replication", "threads",
"cpu", "module_list", "errorstats", "cluster", "keyspace", "keysizes", NULL};
if (!defaults)
defaults = default_sections;
@ -5686,8 +5748,8 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
size_t zmalloc_used = zmalloc_used_memory();
size_t total_system_mem = server.system_memory_size;
const char *evict_policy = evictPolicyToString();
long long memory_lua = evalMemory();
long long memory_functions = functionsMemory();
long long memory_lua = evalScriptsMemoryVM();
long long memory_functions = functionsMemoryVM();
struct redisMemOverhead *mh = getMemoryOverheadData();
/* Peak memory is updated from time to time by serverCron() so it
@ -5702,7 +5764,7 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
bytesToHuman(total_system_hmem,sizeof(total_system_hmem),total_system_mem);
bytesToHuman(used_memory_lua_hmem,sizeof(used_memory_lua_hmem),memory_lua);
bytesToHuman(used_memory_vm_total_hmem,sizeof(used_memory_vm_total_hmem),memory_functions + memory_lua);
bytesToHuman(used_memory_scripts_hmem,sizeof(used_memory_scripts_hmem),mh->lua_caches + mh->functions_caches);
bytesToHuman(used_memory_scripts_hmem,sizeof(used_memory_scripts_hmem),mh->eval_caches + mh->functions_caches);
bytesToHuman(used_memory_rss_hmem,sizeof(used_memory_rss_hmem),server.cron_malloc_stats.process_rss);
bytesToHuman(maxmemory_hmem,sizeof(maxmemory_hmem),server.maxmemory);
@ -5728,7 +5790,7 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
"used_memory_lua:%lld\r\n", memory_lua, /* deprecated, renamed to used_memory_vm_eval */
"used_memory_vm_eval:%lld\r\n", memory_lua,
"used_memory_lua_human:%s\r\n", used_memory_lua_hmem, /* deprecated */
"used_memory_scripts_eval:%lld\r\n", (long long)mh->lua_caches,
"used_memory_scripts_eval:%lld\r\n", (long long)mh->eval_caches,
"number_of_cached_scripts:%lu\r\n", dictSize(evalScriptsDict()),
"number_of_functions:%lu\r\n", functionsNum(),
"number_of_libraries:%lu\r\n", functionsLibNum(),
@ -5736,7 +5798,7 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
"used_memory_vm_total:%lld\r\n", memory_functions + memory_lua,
"used_memory_vm_total_human:%s\r\n", used_memory_vm_total_hmem,
"used_memory_functions:%lld\r\n", (long long)mh->functions_caches,
"used_memory_scripts:%lld\r\n", (long long)mh->lua_caches + (long long)mh->functions_caches,
"used_memory_scripts:%lld\r\n", (long long)mh->eval_caches + (long long)mh->functions_caches,
"used_memory_scripts_human:%s\r\n", used_memory_scripts_hmem,
"maxmemory:%lld\r\n", server.maxmemory,
"maxmemory_human:%s\r\n", maxmemory_hmem,
@ -5861,9 +5923,29 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
}
}
/* Threads */
int stat_io_ops_processed_calculated = 0;
long long stat_io_reads_processed = 0, stat_io_writes_processed = 0;
long long stat_total_reads_processed = 0, stat_total_writes_processed = 0;
if (all_sections || (dictFind(section_dict,"threads") != NULL)) {
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info, "# Threads\r\n");
long long reads, writes;
for (j = 0; j < server.io_threads_num; j++) {
atomicGet(server.stat_io_reads_processed[j], reads);
atomicGet(server.stat_io_writes_processed[j], writes);
info = sdscatprintf(info, "io_thread_%d:clients=%d,reads=%lld,writes=%lld\r\n",
j, server.io_threads_clients_num[j], reads, writes);
stat_total_reads_processed += reads;
if (j != 0) stat_io_reads_processed += reads; /* Skip the main thread */
stat_total_writes_processed += writes;
if (j != 0) stat_io_writes_processed += writes; /* Skip the main thread */
}
stat_io_ops_processed_calculated = 1;
}
/* Stats */
if (all_sections || (dictFind(section_dict,"stats") != NULL)) {
long long stat_total_reads_processed, stat_total_writes_processed;
long long stat_net_input_bytes, stat_net_output_bytes;
long long stat_net_repl_input_bytes, stat_net_repl_output_bytes;
long long current_eviction_exceeded_time = server.stat_last_eviction_exceeded_time ?
@ -5871,14 +5953,26 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
long long current_active_defrag_time = server.stat_last_active_defrag_time ?
(long long) elapsedUs(server.stat_last_active_defrag_time): 0;
long long stat_client_qbuf_limit_disconnections;
atomicGet(server.stat_total_reads_processed, stat_total_reads_processed);
atomicGet(server.stat_total_writes_processed, stat_total_writes_processed);
atomicGet(server.stat_net_input_bytes, stat_net_input_bytes);
atomicGet(server.stat_net_output_bytes, stat_net_output_bytes);
atomicGet(server.stat_net_repl_input_bytes, stat_net_repl_input_bytes);
atomicGet(server.stat_net_repl_output_bytes, stat_net_repl_output_bytes);
atomicGet(server.stat_client_qbuf_limit_disconnections, stat_client_qbuf_limit_disconnections);
/* If we calculated the total reads and writes in the threads section,
* we don't need to do it again, and also keep the values consistent. */
if (!stat_io_ops_processed_calculated) {
long long reads, writes;
for (j = 0; j < server.io_threads_num; j++) {
atomicGet(server.stat_io_reads_processed[j], reads);
stat_total_reads_processed += reads;
if (j != 0) stat_io_reads_processed += reads; /* Skip the main thread */
atomicGet(server.stat_io_writes_processed[j], writes);
stat_total_writes_processed += writes;
if (j != 0) stat_io_writes_processed += writes; /* Skip the main thread */
}
}
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info, "# Stats\r\n" FMTARGS(
"total_connections_received:%lld\r\n", server.stat_numconnections,
@ -5929,8 +6023,8 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
"dump_payload_sanitizations:%lld\r\n", server.stat_dump_payload_sanitizations,
"total_reads_processed:%lld\r\n", stat_total_reads_processed,
"total_writes_processed:%lld\r\n", stat_total_writes_processed,
"io_threaded_reads_processed:%lld\r\n", server.stat_io_reads_processed,
"io_threaded_writes_processed:%lld\r\n", server.stat_io_writes_processed,
"io_threaded_reads_processed:%lld\r\n", stat_io_reads_processed,
"io_threaded_writes_processed:%lld\r\n", stat_io_writes_processed,
"client_query_buffer_limit_disconnections:%lld\r\n", stat_client_qbuf_limit_disconnections,
"client_output_buffer_limit_disconnections:%lld\r\n", server.stat_client_outbuf_limit_disconnections,
"reply_buffer_shrinks:%lld\r\n", server.stat_reply_buffer_shrinks,
@ -5969,7 +6063,9 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
"master_last_io_seconds_ago:%d\r\n", server.master ? ((int)(server.unixtime-server.master->lastinteraction)) : -1,
"master_sync_in_progress:%d\r\n", server.repl_state == REPL_STATE_TRANSFER,
"slave_read_repl_offset:%lld\r\n", slave_read_repl_offset,
"slave_repl_offset:%lld\r\n", slave_repl_offset));
"slave_repl_offset:%lld\r\n", slave_repl_offset,
"replica_full_sync_buffer_size:%zu\r\n", server.repl_full_sync_buffer.size,
"replica_full_sync_buffer_peak:%zu\r\n", server.repl_full_sync_buffer.peak));
if (server.repl_state == REPL_STATE_TRANSFER) {
double perc = 0;
@ -5998,7 +6094,7 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
info = sdscatprintf(info,
"connected_slaves:%lu\r\n",
listLength(server.slaves));
replicationLogicalReplicaCount());
/* If min-slaves-to-write is active, write the number of slaves
* currently considered 'good'. */
@ -6021,6 +6117,14 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
int port;
long lag = 0;
/* During rdbchannel replication, replica opens two connections.
* These are distinct slaves in server.slaves list from master
* POV. We don't want to list these separately. If a rdbchannel
* replica has an associated main-channel replica in
* server.slaves list, we'll list main channel replica only. */
if (replicationCheckHasMainChannel(slave))
continue;
if (!slaveip) {
if (connAddrPeerName(slave->conn,ip,sizeof(ip),&port) == -1)
continue;
@ -6149,6 +6253,60 @@ sds genRedisInfoString(dict *section_dict, int all_sections, int everything) {
}
}
/* keysizes */
if (all_sections || (dictFind(section_dict,"keysizes") != NULL)) {
if (sections++) info = sdscat(info,"\r\n");
info = sdscatprintf(info, "# Keysizes\r\n");
char *typestr[] = {
[OBJ_STRING] = "distrib_strings_sizes",
[OBJ_LIST] = "distrib_lists_items",
[OBJ_SET] = "distrib_sets_items",
[OBJ_ZSET] = "distrib_zsets_items",
[OBJ_HASH] = "distrib_hashes_items"
};
serverAssert(sizeof(typestr)/sizeof(typestr[0]) == OBJ_TYPE_BASIC_MAX);
for (int dbnum = 0; dbnum < server.dbnum; dbnum++) {
char *expSizeLabels[] = {
"1", "2", "4", "8", "16", "32", "64", "128", "256", "512", /* Byte */
"1K", "2K", "4K", "8K", "16K", "32K", "64K", "128K", "256K", "512K", /* Kilo */
"1M", "2M", "4M", "8M", "16M", "32M", "64M", "128M", "256M", "512M", /* Mega */
"1G", "2G", "4G", "8G", "16G", "32G", "64G", "128G", "256G", "512G", /* Giga */
"1T", "2T", "4T", "8T", "16T", "32T", "64T", "128T", "256T", "512T", /* Tera */
"1P", "2P", "4P", "8P", "16P", "32P", "64P", "128P", "256P", "512P", /* Peta */
"1E", "2E", "4E", "8E" /* Exa */
};
if (kvstoreSize(server.db[dbnum].keys) == 0)
continue;
for (int type = 0; type < OBJ_TYPE_BASIC_MAX; type++) {
uint64_t *kvstoreHist = kvstoreGetMetadata(server.db[dbnum].keys)->keysizes_hist[type];
char buf[10000];
int cnt = 0, buflen = 0;
/* Print histogram to temp buf[]. First bin is garbage */
buflen += snprintf(buf + buflen, sizeof(buf) - buflen, "db%d_%s:", dbnum, typestr[type]);
for (int i = 0; i < MAX_KEYSIZES_BINS; i++) {
if (kvstoreHist[i] == 0)
continue;
int res = snprintf(buf + buflen, sizeof(buf) - buflen,
(cnt == 0) ? "%s=%llu" : ",%s=%llu",
expSizeLabels[i], (unsigned long long) kvstoreHist[i]);
if (res < 0) break;
buflen += res;
cnt += kvstoreHist[i];
}
/* Print the temp buf[] to the info string */
if (cnt) info = sdscatprintf(info, "%s\r\n", buf);
}
}
}
/* Get info from modules.
* Returned when the user asked for "everything", "modules", or a specific module section.
* We're not aware of the module section names here, and we rather avoid the search when we can.

View File

@ -41,10 +41,6 @@
#include <systemd/sd-daemon.h>
#endif
#ifndef static_assert
#define static_assert(expr, lit) extern char __static_assert_failure[(expr) ? 1:-1]
#endif
typedef long long mstime_t; /* millisecond time type. */
typedef long long ustime_t; /* microsecond time type. */
@ -65,6 +61,7 @@ typedef long long ustime_t; /* microsecond time type. */
N-elements flat arrays */
#include "rax.h" /* Radix tree */
#include "connection.h" /* Connection abstraction */
#include "eventnotifier.h" /* Event notification */
#define REDISMODULE_CORE 1
typedef struct redisObject robj;
@ -95,6 +92,7 @@ struct hdr_histogram;
/* Error codes */
#define C_OK 0
#define C_ERR -1
#define C_RETRY -2
/* Static server configuration */
#define CONFIG_DEFAULT_HZ 10 /* Time interrupt calls/sec. */
@ -188,6 +186,14 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
/* Hash table parameters */
#define HASHTABLE_MAX_LOAD_FACTOR 1.618 /* Maximum hash table load factor. */
/* Max number of IO threads */
#define IO_THREADS_MAX_NUM 128
/* Main thread id for doing IO work, whatever we enable or disable io thread
* the main thread always does IO work, so we can consider that the main thread
* is the io thread 0. */
#define IOTHREAD_MAIN_THREAD_ID 0
/* Command flags. Please check the definition of struct redisCommand in this file
* for more information about the meaning of every flag. */
#define CMD_WRITE (1ULL<<0)
@ -278,6 +284,7 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
* out to all keys it should cover */
#define CMD_KEY_VARIABLE_FLAGS (1ULL<<10) /* Means that some keys might have
* different flags depending on arguments */
#define CMD_KEY_PREFIX (1ULL<<11) /* Given key represents a prefix of a set of keys */
/* Key flags for when access type is unknown */
#define CMD_KEY_FULL_ACCESS (CMD_KEY_RW | CMD_KEY_ACCESS | CMD_KEY_UPDATE)
@ -388,11 +395,34 @@ extern int configOOMScoreAdjValuesDefaults[CONFIG_OOM_COUNT];
#define CLIENT_MODULE_PREVENT_AOF_PROP (1ULL<<48) /* Module client do not want to propagate to AOF */
#define CLIENT_MODULE_PREVENT_REPL_PROP (1ULL<<49) /* Module client do not want to propagate to replica */
#define CLIENT_REPROCESSING_COMMAND (1ULL<<50) /* The client is re-processing the command. */
#define CLIENT_REUSABLE_QUERYBUFFER (1ULL<<51) /* The client is using the reusable query buffer. */
#define CLIENT_REPL_RDB_CHANNEL (1ULL<<51) /* Client which is used for rdb delivery as part of rdb channel replication */
/* Any flag that does not let optimize FLUSH SYNC to run it in bg as blocking client ASYNC */
#define CLIENT_AVOID_BLOCKING_ASYNC_FLUSH (CLIENT_DENY_BLOCKING|CLIENT_MULTI|CLIENT_LUA_DEBUG|CLIENT_LUA_DEBUG_SYNC|CLIENT_MODULE)
/* Client flags for client IO */
#define CLIENT_IO_READ_ENABLED (1ULL<<0) /* Client can read from socket. */
#define CLIENT_IO_WRITE_ENABLED (1ULL<<1) /* Client can write to socket. */
#define CLIENT_IO_PENDING_COMMAND (1ULL<<2) /* Similar to CLIENT_PENDING_COMMAND. */
#define CLIENT_IO_REUSABLE_QUERYBUFFER (1ULL<<3) /* The client is using the reusable query buffer. */
#define CLIENT_IO_CLOSE_ASAP (1ULL<<4) /* Close this client ASAP in IO thread. */
/* Definitions for client read errors. These error codes are used to indicate
* various issues that can occur while reading or parsing data from a client. */
#define CLIENT_READ_TOO_BIG_INLINE_REQUEST 1
#define CLIENT_READ_UNBALANCED_QUOTES 2
#define CLIENT_READ_MASTER_USING_INLINE_PROTOCAL 3
#define CLIENT_READ_TOO_BIG_MBULK_COUNT_STRING 4
#define CLIENT_READ_TOO_BIG_BUCK_COUNT_STRING 5
#define CLIENT_READ_EXPECTED_DOLLAR 6
#define CLIENT_READ_INVALID_BUCK_LENGTH 7
#define CLIENT_READ_UNAUTH_BUCK_LENGTH 8
#define CLIENT_READ_INVALID_MULTIBUCK_LENGTH 9
#define CLIENT_READ_UNAUTH_MBUCK_COUNT 10
#define CLIENT_READ_CONN_DISCONNECTED 11
#define CLIENT_READ_CONN_CLOSED 12
#define CLIENT_READ_REACHED_MAX_QUERYBUF 13
/* Client block type (btype field in client structure)
* if CLIENT_BLOCKED flag is set. */
typedef enum blocking_type {
@ -445,6 +475,24 @@ typedef enum {
REPL_STATE_CONNECTED, /* Connected to master */
} repl_state;
/* Replica rdb channel replication state. Used in server.repl_rdb_ch_state for
* replicas to remember what to do next. */
typedef enum {
REPL_RDB_CH_STATE_CLOSE_ASAP = -1, /* Async error state */
REPL_RDB_CH_STATE_NONE = 0, /* No active rdb channel sync */
REPL_RDB_CH_SEND_HANDSHAKE, /* Send handshake sequence to master */
REPL_RDB_CH_RECEIVE_AUTH_REPLY, /* Wait for AUTH reply */
REPL_RDB_CH_RECEIVE_REPLCONF_REPLY, /* Wait for REPLCONF reply */
REPL_RDB_CH_RECEIVE_FULLRESYNC, /* Wait for +FULLRESYNC reply */
REPL_RDB_CH_RDB_LOADING, /* Loading rdb using rdb channel */
} repl_rdb_channel_state;
/* Replication debug flags for testing. */
#define REPL_DEBUG_PAUSE_NONE (1 << 0)
#define REPL_DEBUG_AFTER_FORK (1 << 1)
#define REPL_DEBUG_BEFORE_RDB_CHANNEL (1 << 2)
#define REPL_DEBUG_ON_STREAMING_REPL_BUF (1 << 3)
/* The state of an in progress coordinated failover */
typedef enum {
NO_FAILOVER = 0, /* No failover in progress */
@ -463,16 +511,22 @@ typedef enum {
#define SLAVE_STATE_ONLINE 9 /* RDB file transmitted, sending just updates. */
#define SLAVE_STATE_RDB_TRANSMITTED 10 /* RDB file transmitted - This state is used only for
* a replica that only wants RDB without replication buffer */
#define SLAVE_STATE_WAIT_RDB_CHANNEL 11 /* Main channel of replica is connected,
* we are waiting rdbchannel connection to start delivery.*/
#define SLAVE_STATE_SEND_BULK_AND_STREAM 12 /* Main channel of a replica which uses rdb channel replication.
* Sending RDB file and replication stream in parallel. */
/* Slave capabilities. */
#define SLAVE_CAPA_NONE 0
#define SLAVE_CAPA_EOF (1<<0) /* Can parse the RDB EOF streaming format. */
#define SLAVE_CAPA_PSYNC2 (1<<1) /* Supports PSYNC2 protocol. */
#define SLAVE_CAPA_RDB_CHANNEL_REPL (1<<2) /* Supports rdb channel replication during full sync */
/* Slave requirements */
#define SLAVE_REQ_NONE 0
#define SLAVE_REQ_RDB_EXCLUDE_DATA (1 << 0) /* Exclude data from RDB */
#define SLAVE_REQ_RDB_EXCLUDE_FUNCTIONS (1 << 1) /* Exclude functions from RDB */
#define SLAVE_REQ_RDB_CHANNEL (1 << 2) /* Use rdb channel replication */
/* Mask of all bits in the slave requirements bitfield that represent non-standard (filtered) RDB requirements */
#define SLAVE_REQ_RDB_MASK (SLAVE_REQ_RDB_EXCLUDE_DATA | SLAVE_REQ_RDB_EXCLUDE_FUNCTIONS)
@ -581,6 +635,12 @@ typedef enum {
#define SHUTDOWN_NOW 4 /* Don't wait for replicas to catch up. */
#define SHUTDOWN_FORCE 8 /* Don't let errors prevent shutdown. */
/* IO thread pause status */
#define IO_THREAD_UNPAUSED 0
#define IO_THREAD_PAUSING 1
#define IO_THREAD_PAUSED 2
#define IO_THREAD_RESUMING 3
/* Command call flags, see call() function */
#define CMD_CALL_NONE 0
#define CMD_CALL_PROPAGATE_AOF (1<<0)
@ -698,6 +758,7 @@ typedef enum {
#define OBJ_SET 2 /* Set object. */
#define OBJ_ZSET 3 /* Sorted set object. */
#define OBJ_HASH 4 /* Hash object. */
#define OBJ_TYPE_BASIC_MAX 5 /* Max number of basic object types. */
/* The "module" object type is a special one that signals that the object
* is one directly managed by a Redis module. In this case the value points
@ -969,7 +1030,7 @@ typedef struct replBufBlock {
* by integers from 0 (the default database) up to the max configured
* database. The database number is the 'id' field in the structure. */
typedef struct redisDb {
kvstore *keys; /* The keyspace for this DB */
kvstore *keys; /* The keyspace for this DB. As metadata, holds keysizes histogram */
kvstore *expires; /* Timeout of keys with a timeout set */
ebuckets hexpires; /* Hash expiration DS. Single TTL per hash (of next min field to expire) */
dict *blocking_keys; /* Keys with clients waiting for data (BLPOP)*/
@ -1127,6 +1188,23 @@ typedef struct replBacklog {
* byte in the replication backlog buffer.*/
} replBacklog;
/* Used by replDataBuf during rdb channel replication to accumulate replication
* stream on replica side. */
typedef struct replDataBufBlock {
size_t used; /* Used bytes in the buf */
size_t size; /* Size of the buf */
char buf[]; /* Replication data */
} replDataBufBlock;
/* Linked list of replDataBufBlock structs, holds replication stream during
* rdb channel replication on replica side. */
typedef struct replDataBuf {
list *blocks; /* List of replDataBufBlock */
size_t size; /* Total number of bytes available in all blocks. */
size_t used; /* Total number of bytes actually used in all blocks. */
size_t peak; /* Peak number of bytes stored in all blocks. */
} replDataBuf;
typedef struct {
list *clients;
size_t mem_usage_sum;
@ -1161,6 +1239,10 @@ typedef struct client {
uint64_t id; /* Client incremental unique ID. */
uint64_t flags; /* Client flags: CLIENT_* macros. */
connection *conn;
uint8_t tid; /* Thread assigned ID this client is bound to. */
uint8_t running_tid; /* Thread assigned ID this client is running on. */
uint8_t io_flags; /* Accessed by both main and IO threads, but not modified concurrently */
uint8_t read_error; /* Client read error: CLIENT_READ_* macros. */
int resp; /* RESP protocol version. Can be 2 or 3. */
redisDb *db; /* Pointer to currently SELECTed DB. */
robj *name; /* As set by CLIENT SETNAME. */
@ -1176,6 +1258,7 @@ typedef struct client {
robj **original_argv; /* Arguments of original command if arguments were rewritten. */
size_t argv_len_sum; /* Sum of lengths of objects in argv list. */
struct redisCommand *cmd, *lastcmd; /* Last command executed. */
struct redisCommand *iolookedcmd; /* Command looked up in IO threads. */
struct redisCommand *realcmd; /* The original command that was executed by the client,
Used to update error stats in case the c->cmd was modified
during the command invocation (like on GEOADD for example). */
@ -1218,6 +1301,7 @@ typedef struct client {
char *slave_addr; /* Optionally given by REPLCONF ip-address */
int slave_capa; /* Slave capabilities: SLAVE_CAPA_* bitwise OR. */
int slave_req; /* Slave requirements: SLAVE_REQ_* */
uint64_t main_ch_client_id; /* The client id of this replica's main channel */
multiState mstate; /* MULTI/EXEC state */
blockingState bstate; /* blocking state */
long long woff; /* Last write global replication offset. */
@ -1228,8 +1312,8 @@ typedef struct client {
sds peerid; /* Cached peer ID. */
sds sockname; /* Cached connection target address. */
listNode *client_list_node; /* list node in client list */
listNode *io_thread_client_list_node; /* list node in io thread client list */
listNode *postponed_list_node; /* list node within the postponed list */
listNode *pending_read_list_node; /* list node in clients pending read list */
void *module_blocked_client; /* Pointer to the RedisModuleBlockedClient associated with this
* client. This is set in case of module authentication before the
* unblocked client is reprocessed to handle reply callbacks. */
@ -1282,6 +1366,20 @@ typedef struct client {
#endif
} client;
typedef struct __attribute__((aligned(CACHE_LINE_SIZE))) {
uint8_t id; /* The unique ID assigned, if IO_THREADS_MAX_NUM is more
* than 256, we should also promote the data type. */
pthread_t tid; /* Pthread ID */
redisAtomic int paused; /* Paused status for the io thread. */
aeEventLoop *el; /* Main event loop of io thread. */
list *pending_clients; /* List of clients with pending writes. */
list *processing_clients; /* List of clients being processed. */
eventNotifier *pending_clients_notifier; /* Used to wake up the loop when write should be performed. */
pthread_mutex_t pending_clients_mutex; /* Mutex for pending write list */
list *pending_clients_to_main_thread; /* Clients that are waiting to be executed by the main thread. */
list *clients; /* IO thread managed clients. */
} IOThread;
/* ACL information */
typedef struct aclInfo {
long long user_auth_failures; /* Auth failure counts on user level */
@ -1404,8 +1502,9 @@ struct redisMemOverhead {
size_t clients_normal;
size_t cluster_links;
size_t aof_buffer;
size_t lua_caches;
size_t eval_caches;
size_t functions_caches;
size_t script_vm;
size_t overhead_total;
size_t dataset;
size_t total_keys;
@ -1569,6 +1668,7 @@ struct redisServer {
int errors_enabled; /* If true, errorstats is enabled, and we will add new errors. */
unsigned int lruclock; /* Clock for LRU eviction */
volatile sig_atomic_t shutdown_asap; /* Shutdown ordered by signal handler. */
volatile sig_atomic_t crashing; /* Server is crashing report. */
mstime_t shutdown_mstime; /* Timestamp to limit graceful shutdown. */
int last_sig_received; /* Indicates the last SIGNAL received, if any (e.g., SIGINT or SIGTERM). */
int shutdown_flags; /* Flags passed to prepareForShutdown(). */
@ -1592,7 +1692,7 @@ struct redisServer {
dict *moduleapi; /* Exported core APIs dictionary for modules. */
dict *sharedapi; /* Like moduleapi but containing the APIs that
modules share with each other. */
dict *module_configs_queue; /* Dict that stores module configurations from .conf file until after modules are loaded during startup or arguments to loadex. */
dict *module_configs_queue; /* Unmapped configs are queued here, assumed to be module config. Applied after modules are loaded during startup or arguments to loadex. */
list *loadmodule_queue; /* List of modules to load at startup. */
int module_pipe[2]; /* Pipe used to awake the event loop by module threads. */
pid_t child_pid; /* PID of current child */
@ -1639,6 +1739,7 @@ struct redisServer {
redisAtomic uint64_t next_client_id; /* Next client unique ID. Incremental. */
int protected_mode; /* Don't accept external connections. */
int io_threads_num; /* Number of IO threads to use. */
int io_threads_clients_num[IO_THREADS_MAX_NUM]; /* Number of clients assigned to each IO thread. */
int io_threads_do_reads; /* Read and parse from IO threads? */
int io_threads_active; /* Is IO threads currently active? */
long long events_processed_while_blocked; /* processEventsWhileBlocked() */
@ -1711,10 +1812,8 @@ struct redisServer {
long long stat_unexpected_error_replies; /* Number of unexpected (aof-loading, replica to master, etc.) error replies */
long long stat_total_error_replies; /* Total number of issued error replies ( command + rejected errors ) */
long long stat_dump_payload_sanitizations; /* Number deep dump payloads integrity validations. */
long long stat_io_reads_processed; /* Number of read events processed by IO / Main threads */
long long stat_io_writes_processed; /* Number of write events processed by IO / Main threads */
redisAtomic long long stat_total_reads_processed; /* Total number of read events processed */
redisAtomic long long stat_total_writes_processed; /* Total number of write events processed */
redisAtomic long long stat_io_reads_processed[IO_THREADS_MAX_NUM]; /* Number of read events processed by IO / Main threads */
redisAtomic long long stat_io_writes_processed[IO_THREADS_MAX_NUM]; /* Number of write events processed by IO / Main threads */
redisAtomic long long stat_client_qbuf_limit_disconnections; /* Total number of clients reached query buf length limit */
long long stat_client_outbuf_limit_disconnections; /* Total number of clients reached output buf length limit */
/* The following two are used to track instantaneous metrics, like
@ -1881,6 +1980,8 @@ struct redisServer {
int repl_ping_slave_period; /* Master pings the slave every N seconds */
replBacklog *repl_backlog; /* Replication backlog for partial syncs */
long long repl_backlog_size; /* Backlog circular buffer size */
long long repl_full_sync_buffer_limit; /* Accumulated repl data limit during rdb channel replication */
replDataBuf repl_full_sync_buffer; /* Accumulated replication data for rdb channel replication */
time_t repl_backlog_time_limit; /* Time without slaves after the backlog
gets released. */
time_t repl_no_slaves_since; /* We have no slaves since that time.
@ -1894,6 +1995,9 @@ struct redisServer {
int repl_diskless_sync_delay; /* Delay to start a diskless repl BGSAVE. */
int repl_diskless_sync_max_replicas;/* Max replicas for diskless repl BGSAVE
* delay (start sooner if they all connect). */
int repl_rdb_channel; /* Config used to determine if the replica should
* use rdb channel replication for full syncs. */
int repl_debug_pause; /* Debug config to force the main process to pause. */
size_t repl_buffer_mem; /* The memory of replication buffer. */
list *repl_buffer_blocks; /* Replication buffers blocks list
* (serving replica clients and repl backlog) */
@ -1907,10 +2011,13 @@ struct redisServer {
client *cached_master; /* Cached master to be reused for PSYNC. */
int repl_syncio_timeout; /* Timeout for synchronous I/O calls */
int repl_state; /* Replication status if the instance is a slave */
int repl_rdb_ch_state; /* State of the replica's rdb channel during rdb channel replication */
uint64_t repl_main_ch_client_id; /* Main channel client id received in +RDBCHANNELSYNC reply. */
off_t repl_transfer_size; /* Size of RDB to read from master during sync. */
off_t repl_transfer_read; /* Amount of RDB read from master during sync. */
off_t repl_transfer_last_fsync_off; /* Offset when we fsync-ed last time. */
connection *repl_transfer_s; /* Slave -> Master SYNC connection */
connection *repl_rdb_transfer_s; /* Slave -> Master FULL SYNC connection (RDB download) */
int repl_transfer_fd; /* Slave -> Master SYNC temp file descriptor */
char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */
time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */
@ -2462,11 +2569,6 @@ typedef struct {
#define OBJ_HASH_KEY 1
#define OBJ_HASH_VALUE 2
#define IO_THREADS_OP_IDLE 0
#define IO_THREADS_OP_READ 1
#define IO_THREADS_OP_WRITE 2
extern int io_threads_op;
/* Hash-field data type (of t_hash.c) */
typedef mstr hfield;
extern mstrKind mstrFieldKind;
@ -2513,7 +2615,7 @@ void moduleInitModulesSystem(void);
void moduleInitModulesSystemLast(void);
void modulesCron(void);
int moduleLoad(const char *path, void **argv, int argc, int is_loadex);
int moduleUnload(sds name, const char **errmsg);
int moduleUnload(sds name, const char **errmsg, int forced_unload);
void moduleLoadFromQueue(void);
int moduleGetCommandKeysViaAPI(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
int moduleGetCommandChannelsViaAPI(struct redisCommand *cmd, robj **argv, int argc, getKeysResult *result);
@ -2681,9 +2783,6 @@ void whileBlockedCron(void);
void blockingOperationStarts(void);
void blockingOperationEnds(void);
int handleClientsWithPendingWrites(void);
int handleClientsWithPendingWritesUsingThreads(void);
int handleClientsWithPendingReadsUsingThreads(void);
int stopThreadedIOIfNeeded(void);
int clientHasPendingReplies(client *c);
int updateClientMemUsageAndBucket(client *c);
void removeClientFromMemUsageBucket(client *c, int allow_eviction);
@ -2692,13 +2791,32 @@ int writeToClient(client *c, int handler_installed);
void linkClient(client *c);
void protectClient(client *c);
void unprotectClient(client *c);
void initThreadedIO(void);
client *lookupClientByID(uint64_t id);
int authRequired(client *c);
void putClientInPendingWriteQueue(client *c);
/* reply macros */
#define ADD_REPLY_BULK_CBUFFER_STRING_CONSTANT(c, str) addReplyBulkCBuffer(c, str, strlen(str))
/* iothread.c - the threaded io implementation */
void initThreadedIO(void);
void killIOThreads(void);
void pauseIOThread(int id);
void resumeIOThread(int id);
void pauseAllIOThreads(void);
void resumeAllIOThreads(void);
void pauseIOThreadsRange(int start, int end);
void resumeIOThreadsRange(int start, int end);
int resizeAllIOThreadsEventLoops(size_t newsize);
int sendPendingClientsToIOThreads(void);
void enqueuePendingClientsToMainThread(client *c, int unbind);
void putInPendingClienstForIOThreads(client *c);
void handleClientReadError(client *c);
void unbindClientFromIOThreadEventLoop(client *c);
void processClientsOfAllIOThreads(void);
void assignClientToIOThread(client *c);
void fetchClientFromIOThread(client *c);
int isClientMustHandledByMainThread(client *c);
/* logreqres.c - logging of requests and responses */
void reqresReset(client *c, int free_buf);
void reqresSaveClientReplyOffset(client *c);
@ -2799,6 +2917,7 @@ int isSdsRepresentableAsLongLong(sds s, long long *llval);
int isObjectRepresentableAsLongLong(robj *o, long long *llongval);
robj *tryObjectEncoding(robj *o);
robj *tryObjectEncodingEx(robj *o, int try_trim);
size_t getObjectLength(robj *o);
robj *getDecodedObject(robj *o);
size_t stringObjectLen(robj *o);
robj *createStringObjectFromLongLong(long long value);
@ -2881,6 +3000,8 @@ void clearFailoverState(void);
void updateFailoverStatus(void);
void abortFailover(const char *err);
const char *getFailoverStateString(void);
int replicationCheckHasMainChannel(client *slave);
unsigned long replicationLogicalReplicaCount(void);
/* Generic persistence functions */
void startLoadingFile(size_t size, char* filename, int rdbflags);
@ -3242,7 +3363,8 @@ void hashTypeCurrentObject(hashTypeIterator *hi, int what, unsigned char **vstr,
unsigned int *vlen, long long *vll, uint64_t *expireTime);
sds hashTypeCurrentObjectNewSds(hashTypeIterator *hi, int what);
hfield hashTypeCurrentObjectNewHfield(hashTypeIterator *hi);
robj *hashTypeGetValueObject(redisDb *db, robj *o, sds field, int hfeFlags, int *isHashDeleted);
int hashTypeGetValueObject(redisDb *db, robj *o, sds field, int hfeFlags,
robj **val, uint64_t *expireTime, int *isHashDeleted);
int hashTypeSet(redisDb *db, robj *o, sds field, sds value, int flags);
robj *hashTypeDup(robj *o, sds newkey, uint64_t *minHashExpire);
uint64_t hashTypeRemoveFromExpires(ebuckets *hexpires, robj *o);
@ -3347,10 +3469,10 @@ void freeServerClientMemUsageBuckets(void);
typedef struct ModuleConfig ModuleConfig;
int performModuleConfigSetFromName(sds name, sds value, const char **err);
int performModuleConfigSetDefaultFromName(sds name, const char **err);
void addModuleBoolConfig(const char *module_name, const char *name, int flags, void *privdata, int default_val);
void addModuleStringConfig(const char *module_name, const char *name, int flags, void *privdata, sds default_val);
void addModuleEnumConfig(const char *module_name, const char *name, int flags, void *privdata, int default_val, configEnum *enum_vals);
void addModuleNumericConfig(const char *module_name, const char *name, int flags, void *privdata, long long default_val, int conf_flags, long long lower, long long upper);
void addModuleBoolConfig(sds name, sds alias, int flags, void *privdata, int default_val);
void addModuleStringConfig(sds name, sds alias, int flags, void *privdata, sds default_val);
void addModuleEnumConfig(sds name, sds alias, int flags, void *privdata, int default_val, configEnum *enum_vals, int num_enum_vals);
void addModuleNumericConfig(sds name, sds alias, int flags, void *privdata, long long default_val, int conf_flags, long long lower, long long upper);
void addModuleConfigApply(list *module_configs, ModuleConfig *module_config);
int moduleConfigApplyConfig(list *module_configs, const char **err, const char **err_arg_name);
int getModuleBoolConfig(ModuleConfig *module_config);
@ -3363,6 +3485,7 @@ long long getModuleNumericConfig(ModuleConfig *module_config);
int setModuleNumericConfig(ModuleConfig *config, long long val, const char **err);
/* db.c -- Keyspace access API */
void updateKeysizesHist(redisDb *db, int didx, uint32_t type, uint64_t oldLen, uint64_t newLen);
int removeExpire(redisDb *db, robj *key);
void deleteExpiredKeyAndPropagate(redisDb *db, robj *keyobj);
void deleteEvictedKeyAndPropagate(redisDb *db, robj *keyobj, long long *key_mem_freed);
@ -3511,9 +3634,9 @@ int ldbIsEnabled(void);
void ldbLog(sds entry);
void ldbLogRedisReply(char *reply);
void sha1hex(char *digest, char *script, size_t len);
unsigned long evalMemory(void);
unsigned long evalScriptsMemoryVM(void);
dict* evalScriptsDict(void);
unsigned long evalScriptsMemory(void);
unsigned long evalScriptsMemoryEngine(void);
uint64_t evalGetCommandFlags(client *c, uint64_t orig_flags);
uint64_t fcallGetCommandFlags(client *c, uint64_t orig_flags);
int isInsideYieldingLongCommand(void);
@ -3775,6 +3898,7 @@ void configGetCommand(client *c);
void configResetStatCommand(client *c);
void configRewriteCommand(client *c);
void configHelpCommand(client *c);
int configExists(const sds name);
void hincrbyCommand(client *c);
void hincrbyfloatCommand(client *c);
void subscribeCommand(client *c);
@ -3898,11 +4022,11 @@ void xorDigest(unsigned char *digest, const void *ptr, size_t len);
sds catSubCommandFullname(const char *parent_name, const char *sub_name);
void commandAddSubcommand(struct redisCommand *parent, struct redisCommand *subcommand, const char *declared_name);
void debugDelay(int usec);
void killIOThreads(void);
void killThreads(void);
void makeThreadKillable(void);
void swapMainDbWithTempDb(redisDb *tempDb);
sds getVersion(void);
void debugPauseProcess(void);
/* Use macro for checking log level to avoid evaluating arguments in cases log
* should be ignored due to low level. */

View File

@ -53,11 +53,12 @@ static ConnectionType CT_Socket;
* be embedded in different structs, not just client.
*/
static connection *connCreateSocket(void) {
static connection *connCreateSocket(struct aeEventLoop *el) {
connection *conn = zcalloc(sizeof(connection));
conn->type = &CT_Socket;
conn->fd = -1;
conn->iovcnt = IOV_MAX;
conn->el = el;
return conn;
}
@ -72,9 +73,9 @@ static connection *connCreateSocket(void) {
* is not in an error state (which is not possible for a socket connection,
* but could but possible with other protocols).
*/
static connection *connCreateAcceptedSocket(int fd, void *priv) {
static connection *connCreateAcceptedSocket(struct aeEventLoop *el, int fd, void *priv) {
UNUSED(priv);
connection *conn = connCreateSocket();
connection *conn = connCreateSocket(el);
conn->fd = fd;
conn->state = CONN_STATE_ACCEPTING;
return conn;
@ -93,7 +94,7 @@ static int connSocketConnect(connection *conn, const char *addr, int port, const
conn->state = CONN_STATE_CONNECTING;
conn->conn_handler = connect_handler;
aeCreateFileEvent(server.el, conn->fd, AE_WRITABLE,
aeCreateFileEvent(conn->el, conn->fd, AE_WRITABLE,
conn->type->ae_handler, conn);
return C_OK;
@ -114,7 +115,7 @@ static void connSocketShutdown(connection *conn) {
/* Close the connection and free resources. */
static void connSocketClose(connection *conn) {
if (conn->fd != -1) {
aeDeleteFileEvent(server.el,conn->fd, AE_READABLE | AE_WRITABLE);
if (conn->el) aeDeleteFileEvent(conn->el, conn->fd, AE_READABLE | AE_WRITABLE);
close(conn->fd);
conn->fd = -1;
}
@ -190,6 +191,15 @@ static int connSocketAccept(connection *conn, ConnectionCallbackFunc accept_hand
return ret;
}
/* Rebind the connection to another event loop, read/write handlers must not
* be installed in the current event loop, otherwise it will cause two event
* loops to manage the same connection at the same time. */
static int connSocketRebindEventLoop(connection *conn, aeEventLoop *el) {
serverAssert(!conn->el && !conn->read_handler && !conn->write_handler);
conn->el = el;
return C_OK;
}
/* Register a write handler, to be called when the connection is writable.
* If NULL, the existing handler is removed.
*
@ -207,9 +217,9 @@ static int connSocketSetWriteHandler(connection *conn, ConnectionCallbackFunc fu
else
conn->flags &= ~CONN_FLAG_WRITE_BARRIER;
if (!conn->write_handler)
aeDeleteFileEvent(server.el,conn->fd,AE_WRITABLE);
aeDeleteFileEvent(conn->el,conn->fd,AE_WRITABLE);
else
if (aeCreateFileEvent(server.el,conn->fd,AE_WRITABLE,
if (aeCreateFileEvent(conn->el,conn->fd,AE_WRITABLE,
conn->type->ae_handler,conn) == AE_ERR) return C_ERR;
return C_OK;
}
@ -222,9 +232,9 @@ static int connSocketSetReadHandler(connection *conn, ConnectionCallbackFunc fun
conn->read_handler = func;
if (!conn->read_handler)
aeDeleteFileEvent(server.el,conn->fd,AE_READABLE);
aeDeleteFileEvent(conn->el,conn->fd,AE_READABLE);
else
if (aeCreateFileEvent(server.el,conn->fd,
if (aeCreateFileEvent(conn->el,conn->fd,
AE_READABLE,conn->type->ae_handler,conn) == AE_ERR) return C_ERR;
return C_OK;
}
@ -250,7 +260,7 @@ static void connSocketEventHandler(struct aeEventLoop *el, int fd, void *clientD
conn->state = CONN_STATE_CONNECTED;
}
if (!conn->write_handler) aeDeleteFileEvent(server.el,conn->fd,AE_WRITABLE);
if (!conn->write_handler) aeDeleteFileEvent(conn->el, conn->fd, AE_WRITABLE);
if (!callHandler(conn, conn->conn_handler)) return;
conn->conn_handler = NULL;
@ -291,7 +301,6 @@ static void connSocketAcceptHandler(aeEventLoop *el, int fd, void *privdata, int
int cport, cfd;
int max = server.max_new_conns_per_cycle;
char cip[NET_IP_STR_LEN];
UNUSED(el);
UNUSED(mask);
UNUSED(privdata);
@ -304,7 +313,7 @@ static void connSocketAcceptHandler(aeEventLoop *el, int fd, void *privdata, int
return;
}
serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
acceptCommonHandler(connCreateAcceptedSocket(cfd, NULL),0,cip);
acceptCommonHandler(connCreateAcceptedSocket(el,cfd,NULL), 0, cip);
}
}
@ -397,6 +406,10 @@ static ConnectionType CT_Socket = {
.blocking_connect = connSocketBlockingConnect,
.accept = connSocketAccept,
/* event loop */
.unbind_event_loop = NULL,
.rebind_event_loop = connSocketRebindEventLoop,
/* IO */
.write = connSocketWrite,
.writev = connSocketWritev,

View File

@ -41,7 +41,7 @@ redisSortOperation *createSortOperation(int type, robj *pattern) {
robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
char *p, *f, *k;
sds spat, ssub;
robj *keyobj, *fieldobj = NULL, *o;
robj *keyobj, *fieldobj = NULL, *o, *val;
int prefixlen, sublen, postfixlen, fieldlen;
/* If the pattern is "#" return the substitution object itself in order
@ -95,7 +95,8 @@ robj *lookupKeyByPattern(redisDb *db, robj *pattern, robj *subst) {
/* Retrieve value from hash by the field name. The returned object
* is a new object with refcount already incremented. */
int isHashDeleted;
o = hashTypeGetValueObject(db, o, fieldobj->ptr, HFE_LAZY_EXPIRE, &isHashDeleted);
hashTypeGetValueObject(db, o, fieldobj->ptr, HFE_LAZY_EXPIRE, &val, NULL, &isHashDeleted);
o = val;
if (isHashDeleted)
goto noobj;

View File

@ -422,9 +422,14 @@ void listpackExExpire(redisDb *db, robj *o, ExpireInfo *info) {
expired++;
}
if (expired)
if (expired) {
lpt->lp = lpDeleteRange(lpt->lp, 0, expired * 3);
/* update keysizes */
unsigned long l = lpLength(lpt->lp) / 3;
updateKeysizesHist(db, getKeySlot(lpt->key), OBJ_HASH, l + expired, l);
}
min = hashTypeGetMinExpire(o, 1 /*accurate*/);
info->nextExpireTime = min;
}
@ -546,6 +551,11 @@ SetExRes hashTypeSetExpiryListpack(HashTypeSetEx *ex, sds field,
if (unlikely(checkAlreadyExpired(expireAt))) {
propagateHashFieldDeletion(ex->db, ex->key->ptr, field, sdslen(field));
hashTypeDelete(ex->hashObj, field, 1);
/* get listpack length */
listpackEx *lpt = ((listpackEx *) ex->hashObj->ptr);
unsigned long length = lpLength(lpt->lp) / 3;
updateKeysizesHist(ex->db, getKeySlot(ex->key->ptr), OBJ_HASH, length+1, length);
server.stat_expired_subkeys++;
ex->fieldDeleted++;
return HSETEX_DELETED;
@ -706,24 +716,28 @@ GetFieldRes hashTypeGetFromHashTable(robj *o, sds field, sds *value, uint64_t *e
* If *vll is populated *vstr is set to NULL, so the caller can
* always check the function return by checking the return value
* for GETF_OK and checking if vll (or vstr) is NULL.
*
* expiredAt - if the field has an expiration time, it will be set to the expiration
* time of the field. Otherwise, will be set to EB_EXPIRE_TIME_INVALID.
*/
GetFieldRes hashTypeGetValue(redisDb *db, robj *o, sds field, unsigned char **vstr,
unsigned int *vlen, long long *vll, int hfeFlags) {
uint64_t expiredAt;
unsigned int *vlen, long long *vll,
int hfeFlags, uint64_t *expiredAt)
{
sds key;
GetFieldRes res;
uint64_t dummy;
if (expiredAt == NULL) expiredAt = &dummy;
if (o->encoding == OBJ_ENCODING_LISTPACK ||
o->encoding == OBJ_ENCODING_LISTPACK_EX) {
*vstr = NULL;
res = hashTypeGetFromListpack(o, field, vstr, vlen, vll, &expiredAt);
res = hashTypeGetFromListpack(o, field, vstr, vlen, vll, expiredAt);
if (res == GETF_NOT_FOUND)
return GETF_NOT_FOUND;
} else if (o->encoding == OBJ_ENCODING_HT) {
sds value = NULL;
res = hashTypeGetFromHashTable(o, field, &value, &expiredAt);
res = hashTypeGetFromHashTable(o, field, &value, expiredAt);
if (res == GETF_NOT_FOUND)
return GETF_NOT_FOUND;
@ -734,7 +748,8 @@ GetFieldRes hashTypeGetValue(redisDb *db, robj *o, sds field, unsigned char **vs
serverPanic("Unknown hash encoding");
}
if ((expiredAt >= (uint64_t) commandTimeSnapshot()) || (hfeFlags & HFE_LAZY_ACCESS_EXPIRED))
if ((*expiredAt >= (uint64_t) commandTimeSnapshot()) ||
(hfeFlags & HFE_LAZY_ACCESS_EXPIRED))
return GETF_OK;
if (server.masterhost) {
@ -787,29 +802,46 @@ GetFieldRes hashTypeGetValue(redisDb *db, robj *o, sds field, unsigned char **vs
* isHashDeleted - If attempted to access expired field and it's the last field
* in the hash, then the hash will as well be deleted. In this case,
* isHashDeleted will be set to 1.
* val - If the field is found, then val will be set to the value object.
* expireTime - If the field exists (`GETF_OK`) then expireTime will be set to
* the expiration time of the field. Otherwise, it will be set to 0.
*
* Returns 1 if the field exists, and 0 when it doesn't.
*/
robj *hashTypeGetValueObject(redisDb *db, robj *o, sds field, int hfeFlags, int *isHashDeleted) {
int hashTypeGetValueObject(redisDb *db, robj *o, sds field, int hfeFlags,
robj **val, uint64_t *expireTime, int *isHashDeleted) {
unsigned char *vstr;
unsigned int vlen;
long long vll;
if (isHashDeleted) *isHashDeleted = 0;
GetFieldRes res = hashTypeGetValue(db,o,field,&vstr,&vlen,&vll, hfeFlags);
if (val) *val = NULL;
GetFieldRes res = hashTypeGetValue(db,o,field,&vstr,&vlen,&vll,
hfeFlags, expireTime);
if (res == GETF_OK) {
if (vstr) return createStringObject((char*)vstr,vlen);
else return createStringObjectFromLongLong(vll);
/* expireTime set to 0 if the field has no expiration time */
if (expireTime && (*expireTime == EB_EXPIRE_TIME_INVALID))
*expireTime = 0;
/* If expected to return the value, then create a new object */
if (val) {
if (vstr) *val = createStringObject((char *) vstr, vlen);
else *val = createStringObjectFromLongLong(vll);
}
return 1;
}
if ((res == GETF_EXPIRED_HASH) && (isHashDeleted))
*isHashDeleted = 1;
/* GETF_EXPIRED_HASH, GETF_EXPIRED, GETF_NOT_FOUND */
return NULL;
return 0;
}
/* Test if the specified field exists in the given hash. If the field is
* expired (HFE), then it will be lazy deleted
* expired (HFE), then it will be lazy deleted unless HFE_LAZY_AVOID_FIELD_DEL
* hfeFlags is set.
*
* hfeFlags - Lookup HFE_LAZY_* flags
* isHashDeleted - If attempted to access expired field and it is the last field
@ -823,7 +855,8 @@ int hashTypeExists(redisDb *db, robj *o, sds field, int hfeFlags, int *isHashDel
unsigned int vlen = UINT_MAX;
long long vll = LLONG_MAX;
GetFieldRes res = hashTypeGetValue(db, o, field, &vstr, &vlen, &vll, hfeFlags);
GetFieldRes res = hashTypeGetValue(db, o, field, &vstr, &vlen, &vll,
hfeFlags, NULL);
if (isHashDeleted)
*isHashDeleted = (res == GETF_EXPIRED_HASH) ? 1 : 0;
return (res == GETF_OK) ? 1 : 0;
@ -933,27 +966,25 @@ int hashTypeSet(redisDb *db, robj *o, sds field, sds value, int flags) {
hashTypeConvert(o, OBJ_ENCODING_HT, &db->hexpires);
} else if (o->encoding == OBJ_ENCODING_HT) {
hfield newField = hfieldNew(field, sdslen(field), 0);
dict *ht = o->ptr;
dictEntry *de, *existing;
/* stored key is different than lookup key */
const uint64_t hash = dictGetHash(ht,field);
/* check if field already exists */
existing = dictFindByHash(ht, field, hash);
/* check if field already exists */
if (existing == NULL) {
hfield newField = hfieldNew(field, sdslen(field), 0);
dictUseStoredKeyApi(ht, 1);
de = dictAddRaw(ht, newField, &existing);
de = dictAddNonExistsByHash(ht, newField, hash);
dictUseStoredKeyApi(ht, 0);
/* If field already exists, then update "field". "Value" will be set afterward */
if (de == NULL) {
if (flags & HASH_SET_KEEP_TTL) {
/* keep old field along with TTL */
hfieldFree(newField);
} else {
/* If attached TTL to the old field, then remove it from hash's private ebuckets */
/* If attached TTL to the old field, then remove it from hash's
* private ebuckets when HASH_SET_KEEP_TTL is not set. */
if (!(flags & HASH_SET_KEEP_TTL)) {
hfield oldField = dictGetKey(existing);
hfieldPersist(o, oldField);
hfieldFree(oldField);
dictSetKey(ht, existing, newField);
}
/* Free the old value */
sdsfree(dictGetVal(existing));
update = 1;
de = existing;
@ -1042,6 +1073,8 @@ SetExRes hashTypeSetExpiryHT(HashTypeSetEx *exInfo, sds field, uint64_t expireAt
/* If expired, then delete the field and propagate the deletion.
* If replica, continue like the field is valid */
if (unlikely(checkAlreadyExpired(expireAt))) {
unsigned long length = dictSize(ht);
updateKeysizesHist(exInfo->db, getKeySlot(exInfo->key->ptr), OBJ_HASH, length, length-1);
/* replicas should not initiate deletion of fields */
propagateHashFieldDeletion(exInfo->db, exInfo->key->ptr, field, sdslen(field));
hashTypeDelete(exInfo->hashObj, field, 1);
@ -2132,6 +2165,7 @@ ebuckets *hashTypeGetDictMetaHFE(dict *d) {
*----------------------------------------------------------------------------*/
void hsetnxCommand(client *c) {
unsigned long hlen;
int isHashDeleted;
robj *o;
if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
@ -2152,6 +2186,8 @@ void hsetnxCommand(client *c) {
addReply(c, shared.cone);
signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id);
hlen = hashTypeLength(o, 0);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_HASH, hlen - 1, hlen);
server.dirty++;
}
@ -2180,6 +2216,8 @@ void hsetCommand(client *c) {
addReply(c, shared.ok);
}
signalModifiedKey(c,c->db,c->argv[1]);
unsigned long l = hashTypeLength(o, 0);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_HASH, l - created, l);
notifyKeyspaceEvent(NOTIFY_HASH,"hset",c->argv[1],c->db->id);
server.dirty += (c->argc - 2)/2;
}
@ -2195,7 +2233,7 @@ void hincrbyCommand(client *c) {
if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
GetFieldRes res = hashTypeGetValue(c->db,o,c->argv[2]->ptr,&vstr,&vlen,&value,
HFE_LAZY_EXPIRE);
HFE_LAZY_EXPIRE, NULL);
if (res == GETF_OK) {
if (vstr) {
if (string2ll((char*)vstr,vlen,&value) == 0) {
@ -2205,11 +2243,14 @@ void hincrbyCommand(client *c) {
} /* Else hashTypeGetValue() already stored it into &value */
} else if ((res == GETF_NOT_FOUND) || (res == GETF_EXPIRED)) {
value = 0;
unsigned long l = hashTypeLength(o, 0);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_HASH, l, l + 1);
} else {
/* Field expired and in turn hash deleted. Create new one! */
o = createHashObject();
dbAdd(c->db,c->argv[1],o);
value = 0;
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_HASH, 0, 1);
}
oldvalue = value;
@ -2242,7 +2283,7 @@ void hincrbyfloatCommand(client *c) {
}
if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return;
GetFieldRes res = hashTypeGetValue(c->db, o,c->argv[2]->ptr,&vstr,&vlen,&ll,
HFE_LAZY_EXPIRE);
HFE_LAZY_EXPIRE, NULL);
if (res == GETF_OK) {
if (vstr) {
if (string2ld((char*)vstr,vlen,&value) == 0) {
@ -2254,11 +2295,14 @@ void hincrbyfloatCommand(client *c) {
}
} else if ((res == GETF_NOT_FOUND) || (res == GETF_EXPIRED)) {
value = 0;
unsigned long l = hashTypeLength(o, 0);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_HASH, l, l + 1);
} else {
/* Field expired and in turn hash deleted. Create new one! */
o = createHashObject();
dbAdd(c->db,c->argv[1],o);
value = 0;
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_HASH, 0, 1);
}
value += incr;
@ -2296,7 +2340,7 @@ static GetFieldRes addHashFieldToReply(client *c, robj *o, sds field, int hfeFla
unsigned int vlen = UINT_MAX;
long long vll = LLONG_MAX;
GetFieldRes res = hashTypeGetValue(c->db, o, field, &vstr, &vlen, &vll, hfeFlags);
GetFieldRes res = hashTypeGetValue(c->db, o, field, &vstr, &vlen, &vll, hfeFlags, NULL);
if (res == GETF_OK) {
if (vstr) {
addReplyBulkCBuffer(c, vstr, vlen);
@ -2356,6 +2400,8 @@ void hdelCommand(client *c) {
if ((o = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
checkType(c,o,OBJ_HASH)) return;
unsigned long oldLen = hashTypeLength(o, 0);
/* Hash field expiration is optimized to avoid frequent update global HFE DS for
* each field deletion. Eventually active-expiration will run and update or remove
* the hash from global HFE DS gracefully. Nevertheless, statistic "subexpiry"
@ -2375,6 +2421,8 @@ void hdelCommand(client *c) {
}
}
if (deleted) {
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_HASH, oldLen, oldLen - deleted);
signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_HASH,"hdel",c->argv[1],c->db->id);
if (keyremoved) {
@ -2407,8 +2455,8 @@ void hstrlenCommand(client *c) {
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
checkType(c,o,OBJ_HASH)) return;
GetFieldRes res = hashTypeGetValue(c->db, o, c->argv[2]->ptr, &vstr, &vlen, &vll,
HFE_LAZY_EXPIRE);
GetFieldRes res = hashTypeGetValue(c->db, o, c->argv[2]->ptr, &vstr,
&vlen, &vll, HFE_LAZY_EXPIRE, NULL);
if (res == GETF_NOT_FOUND || res == GETF_EXPIRED || res == GETF_EXPIRED_HASH) {
addReply(c, shared.czero);
@ -2943,6 +2991,11 @@ static ExpireAction onFieldExpire(eItem item, void *ctx) {
dict *d = expCtx->hashObj->ptr;
dictExpireMetadata *dictExpireMeta = (dictExpireMetadata *) dictMetadata(d);
propagateHashFieldDeletion(expCtx->db, dictExpireMeta->key, hf, hfieldlen(hf));
/* update keysizes */
unsigned long l = hashTypeLength(expCtx->hashObj, 0);
updateKeysizesHist(expCtx->db, getKeySlot(dictExpireMeta->key), OBJ_HASH, l, l - 1);
serverAssert(hashTypeDelete(expCtx->hashObj, hf, 0) == 1);
server.stat_expired_subkeys++;
return ACT_REMOVE_EXP_ITEM;

View File

@ -7,6 +7,7 @@
*/
#include "server.h"
#include "util.h"
/*-----------------------------------------------------------------------------
* List API
@ -462,6 +463,7 @@ void listTypeDelRange(robj *subject, long start, long count) {
/* Implements LPUSH/RPUSH/LPUSHX/RPUSHX.
* 'xx': push if key exists. */
void pushGenericCommand(client *c, int where, int xx) {
unsigned long llen;
int j;
robj *lobj = lookupKeyWrite(c->db, c->argv[1]);
@ -482,11 +484,13 @@ void pushGenericCommand(client *c, int where, int xx) {
server.dirty++;
}
addReplyLongLong(c, listTypeLength(lobj));
llen = listTypeLength(lobj);
addReplyLongLong(c, llen);
char *event = (where == LIST_HEAD) ? "lpush" : "rpush";
signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_LIST,event,c->argv[1],c->db->id);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_LIST, llen - (c->argc - 2), llen);
}
/* LPUSH <key> <element> [<element> ...] */
@ -553,6 +557,8 @@ void linsertCommand(client *c) {
notifyKeyspaceEvent(NOTIFY_LIST,"linsert",
c->argv[1],c->db->id);
server.dirty++;
unsigned long ll = listTypeLength(subject);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_LIST, ll-1, ll);
} else {
/* Notify client of a failed insert */
addReplyLongLong(c,-1);
@ -677,23 +683,20 @@ void addListQuicklistRangeReply(client *c, robj *o, int from, int rangelen, int
* Note that the purpose is to make the methods small so that the
* code in the loop can be inlined better to improve performance. */
void addListListpackRangeReply(client *c, robj *o, int from, int rangelen, int reverse) {
unsigned char *p = lpSeek(o->ptr, from);
unsigned char *vstr;
unsigned int vlen;
long long lval;
unsigned char *lp = o->ptr;
unsigned char *p = lpSeek(lp, from);
const size_t lpbytes = lpBytes(lp);
int64_t vlen;
/* Return the result in form of a multi-bulk reply */
addReplyArrayLen(c,rangelen);
while(rangelen--) {
serverAssert(p); /* fail on corrupt data */
vstr = lpGetValue(p, &vlen, &lval);
if (vstr) {
unsigned char buf[LP_INTBUF_SIZE];
unsigned char *vstr = lpGet(p,&vlen,buf);
addReplyBulkCBuffer(c,vstr,vlen);
} else {
addReplyBulkLongLong(c,lval);
}
p = reverse ? lpPrev(o->ptr,p) : lpNext(o->ptr,p);
p = reverse ? lpPrev(lp,p) : lpNextWithBytes(lp,p,lpbytes);
}
}
@ -736,9 +739,11 @@ void addListRangeReply(client *c, robj *o, long start, long end, int reverse) {
* if the key got deleted by this function. */
void listElementsRemoved(client *c, robj *key, int where, robj *o, long count, int signal, int *deleted) {
char *event = (where == LIST_HEAD) ? "lpop" : "rpop";
unsigned long llen = listTypeLength(o);
notifyKeyspaceEvent(NOTIFY_LIST, event, key, c->db->id);
if (listTypeLength(o) == 0) {
updateKeysizesHist(c->db, getKeySlot(key->ptr), OBJ_LIST, llen + count, llen);
if (llen == 0) {
if (deleted) *deleted = 1;
dbDelete(c->db, key);
@ -870,7 +875,7 @@ void lrangeCommand(client *c) {
/* LTRIM <key> <start> <stop> */
void ltrimCommand(client *c) {
robj *o;
long start, end, llen, ltrim, rtrim;
long start, end, llen, ltrim, rtrim, llenNew;
if ((getLongFromObjectOrReply(c, c->argv[2], &start, NULL) != C_OK) ||
(getLongFromObjectOrReply(c, c->argv[3], &end, NULL) != C_OK)) return;
@ -908,12 +913,13 @@ void ltrimCommand(client *c) {
}
notifyKeyspaceEvent(NOTIFY_LIST,"ltrim",c->argv[1],c->db->id);
if (listTypeLength(o) == 0) {
if ((llenNew = listTypeLength(o)) == 0) {
dbDelete(c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
} else {
listTypeTryConversion(o,LIST_CONV_SHRINKING,NULL,NULL);
}
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_LIST, llen, llenNew);
signalModifiedKey(c,c->db,c->argv[1]);
server.dirty += (ltrim + rtrim);
addReply(c,shared.ok);
@ -1066,8 +1072,11 @@ void lremCommand(client *c) {
listTypeReleaseIterator(li);
if (removed) {
long ll = listTypeLength(subject);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_LIST, ll + removed, ll);
notifyKeyspaceEvent(NOTIFY_LIST,"lrem",c->argv[1],c->db->id);
if (listTypeLength(subject) == 0) {
if (ll == 0) {
dbDelete(c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
} else {
@ -1089,6 +1098,10 @@ void lmoveHandlePush(client *c, robj *dstkey, robj *dstobj, robj *value,
listTypeTryConversionAppend(dstobj,&value,0,0,NULL,NULL);
listTypePush(dstobj,value,where);
signalModifiedKey(c,c->db,dstkey);
long ll = listTypeLength(dstobj);
updateKeysizesHist(c->db, getKeySlot(dstkey->ptr), OBJ_LIST, ll - 1, ll);
notifyKeyspaceEvent(NOTIFY_LIST,
where == LIST_HEAD ? "lpush" : "rpush",
dstkey,

View File

@ -603,6 +603,8 @@ void saddCommand(client *c) {
if (setTypeAdd(set,c->argv[j]->ptr)) added++;
}
if (added) {
unsigned long size = setTypeSize(set);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_SET, size - added, size);
signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_SET,"sadd",c->argv[1],c->db->id);
}
@ -617,6 +619,8 @@ void sremCommand(client *c) {
if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.czero)) == NULL ||
checkType(c,set,OBJ_SET)) return;
unsigned long oldSize = setTypeSize(set);
for (j = 2; j < c->argc; j++) {
if (setTypeRemove(set,c->argv[j]->ptr)) {
deleted++;
@ -628,6 +632,8 @@ void sremCommand(client *c) {
}
}
if (deleted) {
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_SET, oldSize, oldSize - deleted);
signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_SET,"srem",c->argv[1],c->db->id);
if (keyremoved)
@ -669,8 +675,12 @@ void smoveCommand(client *c) {
}
notifyKeyspaceEvent(NOTIFY_SET,"srem",c->argv[1],c->db->id);
/* Update keysizes histogram */
unsigned long srcLen = setTypeSize(srcset);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_SET, srcLen + 1, srcLen);
/* Remove the src set from the database when empty */
if (setTypeSize(srcset) == 0) {
if (srcLen == 0) {
dbDelete(c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[1],c->db->id);
}
@ -686,6 +696,8 @@ void smoveCommand(client *c) {
/* An extra key has changed when ele was successfully added to dstset */
if (setTypeAdd(dstset,ele->ptr)) {
unsigned long dstLen = setTypeSize(dstset);
updateKeysizesHist(c->db, getKeySlot(c->argv[2]->ptr), OBJ_SET, dstLen - 1, dstLen);
server.dirty++;
signalModifiedKey(c,c->db,c->argv[2]);
notifyKeyspaceEvent(NOTIFY_SET,"sadd",c->argv[2],c->db->id);
@ -743,7 +755,7 @@ void scardCommand(client *c) {
void spopWithCountCommand(client *c) {
long l;
unsigned long count, size;
unsigned long count, size, toRemove;
robj *set;
/* Get the count argument */
@ -763,10 +775,12 @@ void spopWithCountCommand(client *c) {
}
size = setTypeSize(set);
toRemove = (count >= size) ? size : count;
/* Generate an SPOP keyspace notification */
notifyKeyspaceEvent(NOTIFY_SET,"spop",c->argv[1],c->db->id);
server.dirty += (count >= size) ? size : count;
server.dirty += toRemove;
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_SET, size, size - toRemove);
/* CASE 1:
* The number of requested elements is greater than or equal to
@ -949,6 +963,7 @@ void spopWithCountCommand(client *c) {
}
void spopCommand(client *c) {
unsigned long size;
robj *set, *ele;
if (c->argc == 3) {
@ -964,6 +979,9 @@ void spopCommand(client *c) {
if ((set = lookupKeyWriteOrReply(c,c->argv[1],shared.null[c->resp]))
== NULL || checkType(c,set,OBJ_SET)) return;
size = setTypeSize(set);
updateKeysizesHist(c->db, getKeySlot(c->argv[1]->ptr), OBJ_SET, size, size-1);
/* Pop a random element from the set */
ele = setTypePopRandom(set);

View File

@ -33,6 +33,7 @@
#define STREAM_LISTPACK_MAX_SIZE (1<<30)
void streamFreeCG(streamCG *cg);
void streamFreeCGGeneric(void *cg);
void streamFreeNACK(streamNACK *na);
size_t streamReplyWithRangeFromConsumerPEL(client *c, stream *s, streamID *start, streamID *end, size_t count, streamConsumer *consumer);
int streamParseStrictIDOrReply(client *c, robj *o, streamID *id, uint64_t missing_seq, int *seq_given);
@ -60,9 +61,9 @@ stream *streamNew(void) {
/* Free a stream, including the listpacks stored inside the radix tree. */
void freeStream(stream *s) {
raxFreeWithCallback(s->rax,(void(*)(void*))lpFree);
raxFreeWithCallback(s->rax, lpFreeGeneric);
if (s->cgroups)
raxFreeWithCallback(s->cgroups,(void(*)(void*))streamFreeCG);
raxFreeWithCallback(s->cgroups, streamFreeCGGeneric);
zfree(s);
}
@ -241,7 +242,7 @@ robj *streamDup(robj *o) {
/* This is a wrapper function for lpGet() to directly get an integer value
* from the listpack (that may store numbers as a string), converting
* the string if needed.
* The 'valid" argument is an optional output parameter to get an indication
* The 'valid' argument is an optional output parameter to get an indication
* if the record was valid, when this parameter is NULL, the function will
* fail with an assertion. */
static inline int64_t lpGetIntegerIfValid(unsigned char *ele, int *valid) {
@ -1742,7 +1743,7 @@ size_t streamReplyWithRange(client *c, stream *s, streamID *start, streamID *end
/* Try to add a new NACK. Most of the time this will work and
* will not require extra lookups. We'll fix the problem later
* if we find that there is already a entry for this ID. */
* if we find that there is already an entry for this ID. */
streamNACK *nack = streamCreateNACK(consumer);
int group_inserted =
raxTryInsert(group->pel,buf,sizeof(buf),nack,NULL);
@ -1875,7 +1876,7 @@ robj *streamTypeLookupWriteOrCreate(client *c, robj *key, int no_create) {
* that can be represented. If 'strict' is set to 1, "-" and "+" will be
* treated as an invalid ID.
*
* The ID form <ms>-* specifies a millisconds-only ID, leaving the sequence part
* The ID form <ms>-* specifies a milliseconds-only ID, leaving the sequence part
* to be autogenerated. When a non-NULL 'seq_given' argument is provided, this
* form is accepted and the argument is set to 0 unless the sequence part is
* specified.
@ -2478,6 +2479,11 @@ void streamFreeNACK(streamNACK *na) {
zfree(na);
}
/* Generic version of streamFreeNACK. */
void streamFreeNACKGeneric(void *na) {
streamFreeNACK((streamNACK *)na);
}
/* Free a consumer and associated data structures. Note that this function
* will not reassign the pending messages associated with this consumer
* nor will delete them from the stream, so when this function is called
@ -2490,6 +2496,11 @@ void streamFreeConsumer(streamConsumer *sc) {
zfree(sc);
}
/* Generic version of streamFreeConsumer. */
void streamFreeConsumerGeneric(void *sc) {
streamFreeConsumer((streamConsumer *)sc);
}
/* Create a new consumer group in the context of the stream 's', having the
* specified name, last server ID and reads counter. If a consumer group with
* the same name already exists NULL is returned, otherwise the pointer to the
@ -2510,11 +2521,16 @@ streamCG *streamCreateCG(stream *s, char *name, size_t namelen, streamID *id, lo
/* Free a consumer group and all its associated data. */
void streamFreeCG(streamCG *cg) {
raxFreeWithCallback(cg->pel,(void(*)(void*))streamFreeNACK);
raxFreeWithCallback(cg->consumers,(void(*)(void*))streamFreeConsumer);
raxFreeWithCallback(cg->pel, streamFreeNACKGeneric);
raxFreeWithCallback(cg->consumers, streamFreeConsumerGeneric);
zfree(cg);
}
/* Generic version of streamFreeCG. */
void streamFreeCGGeneric(void *cg) {
streamFreeCG((streamCG *)cg);
}
/* Lookup the consumer group in the specified stream and returns its
* pointer, otherwise if there is no such group, NULL is returned. */
streamCG *streamLookupCG(stream *s, sds groupname) {

View File

@ -21,7 +21,7 @@ static int checkStringLength(client *c, long long size, long long append) {
return C_OK;
/* 'uint64_t' cast is there just to prevent undefined behavior on overflow */
long long total = (uint64_t)size + append;
/* Test configured max-bulk-len represending a limit of the biggest string object,
/* Test configured max-bulk-len representing a limit of the biggest string object,
* and also test for overflow. */
if (total > server.proto_max_bulk_len || total < size || total < append) {
addReplyError(c,"string exceeds maximum allowed size (proto-max-bulk-len)");
@ -61,7 +61,7 @@ static int checkStringLength(client *c, long long size, long long append) {
static int getExpireMillisecondsOrReply(client *c, robj *expire, int flags, int unit, long long *milliseconds);
void setGenericCommand(client *c, int flags, robj *key, robj *val, robj *expire, int unit, robj *ok_reply, robj *abort_reply) {
long long milliseconds = 0; /* initialized to avoid any harmness warning */
long long milliseconds = 0; /* initialized to avoid any harmless warning */
int found = 0;
int setkey_flags = 0;
@ -420,6 +420,7 @@ void getsetCommand(client *c) {
}
void setrangeCommand(client *c) {
size_t oldLen = 0, newLen;
robj *o;
long offset;
sds value = c->argv[3]->ptr;
@ -449,16 +450,14 @@ void setrangeCommand(client *c) {
o = createObject(OBJ_STRING,sdsnewlen(NULL, offset+value_len));
dbAdd(c->db,c->argv[1],o);
} else {
size_t olen;
/* Key exists, check type */
if (checkType(c,o,OBJ_STRING))
return;
/* Return existing string length when setting nothing */
olen = stringObjectLen(o);
oldLen = stringObjectLen(o);
if (value_len == 0) {
addReplyLongLong(c,olen);
addReplyLongLong(c, oldLen);
return;
}
@ -478,7 +477,10 @@ void setrangeCommand(client *c) {
"setrange",c->argv[1],c->db->id);
server.dirty++;
}
addReplyLongLong(c,sdslen(o->ptr));
newLen = sdslen(o->ptr);
updateKeysizesHist(c->db,getKeySlot(c->argv[1]->ptr),OBJ_STRING,oldLen,newLen);
addReplyLongLong(c,newLen);
}
void getrangeCommand(client *c) {
@ -669,7 +671,7 @@ void incrbyfloatCommand(client *c) {
}
void appendCommand(client *c) {
size_t totlen;
size_t totlen, append_len;
robj *o, *append;
dictEntry *de;
@ -679,7 +681,7 @@ void appendCommand(client *c) {
c->argv[2] = tryObjectEncoding(c->argv[2]);
dbAdd(c->db,c->argv[1],c->argv[2]);
incrRefCount(c->argv[2]);
totlen = stringObjectLen(c->argv[2]);
append_len = totlen = stringObjectLen(c->argv[2]);
} else {
/* Key exists, check type */
if (checkType(c,o,OBJ_STRING))
@ -687,7 +689,7 @@ void appendCommand(client *c) {
/* "append" is an argument, so always an sds */
append = c->argv[2];
const size_t append_len = sdslen(append->ptr);
append_len = sdslen(append->ptr);
if (checkStringLength(c,stringObjectLen(o),append_len) != C_OK)
return;
@ -699,6 +701,7 @@ void appendCommand(client *c) {
signalModifiedKey(c,c->db,c->argv[1]);
notifyKeyspaceEvent(NOTIFY_STRING,"append",c->argv[1],c->db->id);
server.dirty++;
updateKeysizesHist(c->db,getKeySlot(c->argv[1]->ptr),OBJ_STRING, totlen - append_len, totlen);
addReplyLongLong(c,totlen);
}

View File

@ -728,7 +728,7 @@ zskiplistNode *zslNthInLexRange(zskiplist *zsl, zlexrangespec *range, long n) {
x = x->level[0].forward;
}
} else {
/* If offset is big, we caasn jump from the last zsl->level-1 node. */
/* If offset is big, we can jump from the last zsl->level-1 node. */
rank_diff = edge_rank + 1 + n - last_highest_level_rank;
x = zslGetElementByRankFromNode(last_highest_level_node, zsl->level - 1, rank_diff);
}
@ -1843,6 +1843,7 @@ void zaddGenericCommand(client *c, int flags) {
zsetTypeMaybeConvert(zobj, elements);
}
unsigned long llen = zsetLength(zobj);
for (j = 0; j < elements; j++) {
double newscore;
score = scores[j];
@ -1860,6 +1861,7 @@ void zaddGenericCommand(client *c, int flags) {
score = newscore;
}
server.dirty += (added+updated);
updateKeysizesHist(c->db, getKeySlot(key->ptr), OBJ_ZSET, llen, llen+added);
reply_to_client:
if (incr) { /* ZINCRBY or INCR option. */
@ -1907,8 +1909,13 @@ void zremCommand(client *c) {
if (deleted) {
notifyKeyspaceEvent(NOTIFY_ZSET,"zrem",key,c->db->id);
if (keyremoved)
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
if (keyremoved) {
notifyKeyspaceEvent(NOTIFY_GENERIC, "del", key, c->db->id);
/* No need updateKeysizesHist(). dbDelete() done it already. */
} else {
unsigned long len = zsetLength(zobj);
updateKeysizesHist(c->db, getKeySlot(key->ptr), OBJ_ZSET, len + deleted, len);
}
signalModifiedKey(c,c->db,key);
server.dirty += deleted;
}
@ -2023,8 +2030,13 @@ void zremrangeGenericCommand(client *c, zrange_type rangetype) {
if (deleted) {
signalModifiedKey(c,c->db,key);
notifyKeyspaceEvent(NOTIFY_ZSET,notify_type,key,c->db->id);
if (keyremoved)
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
if (keyremoved) {
notifyKeyspaceEvent(NOTIFY_GENERIC, "del", key, c->db->id);
/* No need updateKeysizesHist(). dbDelete() done it already. */
} else {
unsigned long len = zsetLength(zobj);
updateKeysizesHist(c->db, getKeySlot(key->ptr), OBJ_ZSET, len + deleted, len);
}
}
server.dirty += deleted;
addReplyLongLong(c,deleted);
@ -4031,6 +4043,9 @@ void genericZpopCommand(client *c, robj **keyv, int keyc, int where, int emitkey
dbDelete(c->db,key);
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
/* No need updateKeysizesHist(). dbDelete() done it already. */
} else {
updateKeysizesHist(c->db, getKeySlot(key->ptr), OBJ_ZSET, llen, llen - result_count);
}
signalModifiedKey(c,c->db,key);

114
src/tls.c
View File

@ -75,10 +75,6 @@ static int parseProtocolsConfig(const char *str) {
return protocols;
}
/* list of connections with pending data already read from the socket, but not
* served to the reader yet. */
static list *pending_list = NULL;
/**
* OpenSSL global initialization and locking handling callbacks.
* Note that this is only required for OpenSSL < 1.1.0.
@ -144,8 +140,6 @@ static void tlsInit(void) {
if (!RAND_poll()) {
serverLog(LL_WARNING, "OpenSSL: Failed to seed random number generator.");
}
pending_list = listCreate();
}
static void tlsCleanup(void) {
@ -435,20 +429,21 @@ typedef struct tls_connection {
listNode *pending_list_node;
} tls_connection;
static connection *createTLSConnection(int client_side) {
static connection *createTLSConnection(struct aeEventLoop *el, int client_side) {
SSL_CTX *ctx = redis_tls_ctx;
if (client_side && redis_tls_client_ctx)
ctx = redis_tls_client_ctx;
tls_connection *conn = zcalloc(sizeof(tls_connection));
conn->c.type = &CT_TLS;
conn->c.fd = -1;
conn->c.el = el;
conn->c.iovcnt = IOV_MAX;
conn->ssl = SSL_new(ctx);
return (connection *) conn;
}
static connection *connCreateTLS(void) {
return createTLSConnection(1);
static connection *connCreateTLS(struct aeEventLoop *el) {
return createTLSConnection(el, 1);
}
/* Fetch the latest OpenSSL error and store it in the connection */
@ -468,10 +463,11 @@ static void updateTLSError(tls_connection *conn) {
* Callers should use connGetState() and verify the created connection
* is not in an error state.
*/
static connection *connCreateAcceptedTLS(int fd, void *priv) {
static connection *connCreateAcceptedTLS(struct aeEventLoop *el, int fd, void *priv) {
int require_auth = *(int *)priv;
tls_connection *conn = (tls_connection *) createTLSConnection(0);
tls_connection *conn = (tls_connection *) createTLSConnection(el, 0);
conn->c.fd = fd;
conn->c.el = el;
conn->c.state = CONN_STATE_ACCEPTING;
if (!conn->ssl) {
@ -575,17 +571,17 @@ static int updateStateAfterSSLIO(tls_connection *conn, int ret_value, int update
}
static void registerSSLEvent(tls_connection *conn, WantIOType want) {
int mask = aeGetFileEvents(server.el, conn->c.fd);
int mask = aeGetFileEvents(conn->c.el, conn->c.fd);
switch (want) {
case WANT_READ:
if (mask & AE_WRITABLE) aeDeleteFileEvent(server.el, conn->c.fd, AE_WRITABLE);
if (!(mask & AE_READABLE)) aeCreateFileEvent(server.el, conn->c.fd, AE_READABLE,
if (mask & AE_WRITABLE) aeDeleteFileEvent(conn->c.el, conn->c.fd, AE_WRITABLE);
if (!(mask & AE_READABLE)) aeCreateFileEvent(conn->c.el, conn->c.fd, AE_READABLE,
tlsEventHandler, conn);
break;
case WANT_WRITE:
if (mask & AE_READABLE) aeDeleteFileEvent(server.el, conn->c.fd, AE_READABLE);
if (!(mask & AE_WRITABLE)) aeCreateFileEvent(server.el, conn->c.fd, AE_WRITABLE,
if (mask & AE_READABLE) aeDeleteFileEvent(conn->c.el, conn->c.fd, AE_READABLE);
if (!(mask & AE_WRITABLE)) aeCreateFileEvent(conn->c.el, conn->c.fd, AE_WRITABLE,
tlsEventHandler, conn);
break;
default:
@ -595,19 +591,42 @@ static void registerSSLEvent(tls_connection *conn, WantIOType want) {
}
static void updateSSLEvent(tls_connection *conn) {
int mask = aeGetFileEvents(server.el, conn->c.fd);
serverAssert(conn->c.el);
int mask = aeGetFileEvents(conn->c.el, conn->c.fd);
int need_read = conn->c.read_handler || (conn->flags & TLS_CONN_FLAG_WRITE_WANT_READ);
int need_write = conn->c.write_handler || (conn->flags & TLS_CONN_FLAG_READ_WANT_WRITE);
if (need_read && !(mask & AE_READABLE))
aeCreateFileEvent(server.el, conn->c.fd, AE_READABLE, tlsEventHandler, conn);
aeCreateFileEvent(conn->c.el, conn->c.fd, AE_READABLE, tlsEventHandler, conn);
if (!need_read && (mask & AE_READABLE))
aeDeleteFileEvent(server.el, conn->c.fd, AE_READABLE);
aeDeleteFileEvent(conn->c.el, conn->c.fd, AE_READABLE);
if (need_write && !(mask & AE_WRITABLE))
aeCreateFileEvent(server.el, conn->c.fd, AE_WRITABLE, tlsEventHandler, conn);
aeCreateFileEvent(conn->c.el, conn->c.fd, AE_WRITABLE, tlsEventHandler, conn);
if (!need_write && (mask & AE_WRITABLE))
aeDeleteFileEvent(server.el, conn->c.fd, AE_WRITABLE);
aeDeleteFileEvent(conn->c.el, conn->c.fd, AE_WRITABLE);
}
/* Add a connection to the list of connections with pending data that has
* already been read from the socket but has not yet been served to the reader. */
static void tlsPendingAdd(tls_connection *conn) {
if (!conn->c.el->privdata[1])
conn->c.el->privdata[1] = listCreate();
list *pending_list = conn->c.el->privdata[1];
if (!conn->pending_list_node) {
listAddNodeTail(pending_list, conn);
conn->pending_list_node = listLast(pending_list);
}
}
/* Removes a connection from the list of connections with pending data. */
static void tlsPendingRemove(tls_connection *conn) {
if (conn->pending_list_node) {
list *pending_list = conn->c.el->privdata[1];
listDelNode(pending_list, conn->pending_list_node);
conn->pending_list_node = NULL;
}
}
static void tlsHandleEvent(tls_connection *conn, int mask) {
@ -718,13 +737,9 @@ static void tlsHandleEvent(tls_connection *conn, int mask) {
* to a list of pending connection that should be handled anyway. */
if ((mask & AE_READABLE)) {
if (SSL_pending(conn->ssl) > 0) {
if (!conn->pending_list_node) {
listAddNodeTail(pending_list, conn);
conn->pending_list_node = listLast(pending_list);
}
tlsPendingAdd(conn);
} else if (conn->pending_list_node) {
listDelNode(pending_list, conn->pending_list_node);
conn->pending_list_node = NULL;
tlsPendingRemove(conn);
}
}
@ -734,7 +749,8 @@ static void tlsHandleEvent(tls_connection *conn, int mask) {
break;
}
updateSSLEvent(conn);
/* The event loop may have been unbound during the event processing above. */
if (conn->c.el) updateSSLEvent(conn);
}
static void tlsEventHandler(struct aeEventLoop *el, int fd, void *clientData, int mask) {
@ -748,7 +764,6 @@ static void tlsAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask)
int cport, cfd;
int max = server.max_new_tls_conns_per_cycle;
char cip[NET_IP_STR_LEN];
UNUSED(el);
UNUSED(mask);
UNUSED(privdata);
@ -761,7 +776,7 @@ static void tlsAcceptHandler(aeEventLoop *el, int fd, void *privdata, int mask)
return;
}
serverLog(LL_VERBOSE,"Accepted %s:%d", cip, cport);
acceptCommonHandler(connCreateAcceptedTLS(cfd, &server.tls_auth_clients),0,cip);
acceptCommonHandler(connCreateAcceptedTLS(el,cfd,&server.tls_auth_clients), 0, cip);
}
}
@ -806,6 +821,7 @@ static void connTLSClose(connection *conn_) {
}
if (conn->pending_list_node) {
list *pending_list = conn->c.el->privdata[1];
listDelNode(pending_list, conn->pending_list_node);
conn->pending_list_node = NULL;
}
@ -863,6 +879,33 @@ static int connTLSConnect(connection *conn_, const char *addr, int port, const c
return C_OK;
}
static void connTLSUnbindEventLoop(connection *conn_) {
tls_connection *conn = (tls_connection *) conn_;
/* We need to remove all events from the old event loop. The subsequent
* updateSSLEvent() will add the appropriate events to the new event loop. */
if (conn->c.el) {
int mask = aeGetFileEvents(conn->c.el, conn->c.fd);
if (mask & AE_READABLE) aeDeleteFileEvent(conn->c.el, conn->c.fd, AE_READABLE);
if (mask & AE_WRITABLE) aeDeleteFileEvent(conn->c.el, conn->c.fd, AE_WRITABLE);
/* Check if there are pending events and handle accordingly. */
int has_pending = conn->pending_list_node != NULL;
if (has_pending) tlsPendingRemove(conn);
}
}
static int connTLSRebindEventLoop(connection *conn_, aeEventLoop *el) {
tls_connection *conn = (tls_connection *) conn_;
serverAssert(!conn->c.el && !conn->c.read_handler &&
!conn->c.write_handler && !conn->pending_list_node);
conn->c.el = el;
if (el && SSL_pending(conn->ssl)) tlsPendingAdd(conn);
/* Add the appropriate events to the new event loop. */
updateSSLEvent((tls_connection *) conn);
return C_OK;
}
static int connTLSWrite(connection *conn_, const void *data, size_t data_len) {
tls_connection *conn = (tls_connection *) conn_;
int ret;
@ -1044,16 +1087,19 @@ static const char *connTLSGetType(connection *conn_) {
return CONN_TYPE_TLS;
}
static int tlsHasPendingData(void) {
static int tlsHasPendingData(struct aeEventLoop *el) {
list *pending_list = el->privdata[1];
if (!pending_list)
return 0;
return listLength(pending_list) > 0;
}
static int tlsProcessPendingData(void) {
static int tlsProcessPendingData(struct aeEventLoop *el) {
listIter li;
listNode *ln;
list *pending_list = el->privdata[1];
if (!pending_list) return 0;
int processed = listLength(pending_list);
listRewind(pending_list,&li);
while((ln = listNext(&li))) {
@ -1114,6 +1160,10 @@ static ConnectionType CT_TLS = {
.blocking_connect = connTLSBlockingConnect,
.accept = connTLSAccept,
/* event loop */
.unbind_event_loop = connTLSUnbindEventLoop,
.rebind_event_loop = connTLSRebindEventLoop,
/* IO */
.read = connTLSRead,
.write = connTLSWrite,

View File

@ -253,6 +253,7 @@ void trackingRememberKeys(client *tracking, client *executing) {
* - Following a flush command, to send a single RESP NULL to indicate
* that all keys are now invalid. */
void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
int paused = 0;
uint64_t old_flags = c->flags;
c->flags |= CLIENT_PUSHING;
@ -275,6 +276,11 @@ void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
c = redir;
using_redirection = 1;
/* Start to touch another client data. */
if (c->running_tid != IOTHREAD_MAIN_THREAD_ID) {
pauseIOThread(c->running_tid);
paused = 1;
}
old_flags = c->flags;
c->flags |= CLIENT_PUSHING;
}
@ -296,7 +302,7 @@ void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
* it since RESP2 does not support push messages in the same
* connection. */
if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
return;
goto done;
}
/* Send the "value" part, which is the array of keys. */
@ -308,6 +314,17 @@ void sendTrackingMessage(client *c, char *keyname, size_t keylen, int proto) {
}
updateClientMemUsageAndBucket(c);
if (!(old_flags & CLIENT_PUSHING)) c->flags &= ~CLIENT_PUSHING;
done:
if (paused) {
if (clientHasPendingReplies(c)) {
serverAssert(!(c->flags & CLIENT_PENDING_WRITE));
/* Actually we install write handler of client which is in IO thread
* event loop, it is safe since the io thread is paused */
connSetWriteHandler(c->conn, sendReplyToClient);
}
resumeIOThread(c->running_tid);
}
}
/* This function is called when a key is modified in Redis and in the case

View File

@ -74,18 +74,19 @@ static int connUnixListen(connListener *listener) {
return C_OK;
}
static connection *connCreateUnix(void) {
static connection *connCreateUnix(struct aeEventLoop *el) {
connection *conn = zcalloc(sizeof(connection));
conn->type = &CT_Unix;
conn->fd = -1;
conn->iovcnt = IOV_MAX;
conn->el = el;
return conn;
}
static connection *connCreateAcceptedUnix(int fd, void *priv) {
static connection *connCreateAcceptedUnix(struct aeEventLoop *el, int fd, void *priv) {
UNUSED(priv);
connection *conn = connCreateUnix();
connection *conn = connCreateUnix(el);
conn->fd = fd;
conn->state = CONN_STATE_ACCEPTING;
return conn;
@ -107,7 +108,7 @@ static void connUnixAcceptHandler(aeEventLoop *el, int fd, void *privdata, int m
return;
}
serverLog(LL_VERBOSE,"Accepted connection to %s", server.unixsocket);
acceptCommonHandler(connCreateAcceptedUnix(cfd, NULL),CLIENT_UNIX_SOCKET,NULL);
acceptCommonHandler(connCreateAcceptedUnix(el, cfd, NULL),CLIENT_UNIX_SOCKET,NULL);
}
}
@ -123,6 +124,10 @@ static int connUnixAccept(connection *conn, ConnectionCallbackFunc accept_handle
return connectionTypeTcp()->accept(conn, accept_handler);
}
static int connUnixRebindEventLoop(connection *conn, aeEventLoop *el) {
return connectionTypeTcp()->rebind_event_loop(conn, el);
}
static int connUnixWrite(connection *conn, const void *data, size_t data_len) {
return connectionTypeTcp()->write(conn, data, data_len);
}
@ -186,6 +191,10 @@ static ConnectionType CT_Unix = {
.blocking_connect = NULL,
.accept = connUnixAccept,
/* event loop */
.unbind_event_loop = NULL,
.rebind_event_loop = connUnixRebindEventLoop,
/* IO */
.write = connUnixWrite,
.writev = connUnixWritev,

View File

@ -54,6 +54,13 @@
#define UNUSED(x) ((void)(x))
/* Selectively define static_assert. Attempt to avoid include server.h in this file. */
#ifndef static_assert
#define static_assert(expr, lit) extern char __static_assert_failure[(expr) ? 1:-1]
#endif
static_assert(UINTPTR_MAX == 0xffffffffffffffff || UINTPTR_MAX == 0xffffffff, "Unsupported pointer size");
/* Glob-style pattern matching. */
static int stringmatchlen_impl(const char *pattern, int patternLen,
const char *string, int stringLen, int nocase, int *skipLongerMatches, int nesting)
@ -102,24 +109,24 @@ static int stringmatchlen_impl(const char *pattern, int patternLen,
pattern++;
patternLen--;
not = pattern[0] == '^';
not = patternLen && pattern[0] == '^';
if (not) {
pattern++;
patternLen--;
}
match = 0;
while(1) {
if (pattern[0] == '\\' && patternLen >= 2) {
if (patternLen >= 2 && pattern[0] == '\\') {
pattern++;
patternLen--;
if (pattern[0] == string[0])
match = 1;
} else if (pattern[0] == ']') {
break;
} else if (patternLen == 0) {
pattern--;
patternLen++;
break;
} else if (pattern[0] == ']') {
break;
} else if (patternLen >= 3 && pattern[1] == '-') {
int start = pattern[0];
int end = pattern[2];
@ -179,7 +186,7 @@ static int stringmatchlen_impl(const char *pattern, int patternLen,
pattern++;
patternLen--;
if (stringLen == 0) {
while(*pattern == '*') {
while(patternLen && *pattern == '*') {
pattern++;
patternLen--;
}
@ -191,6 +198,43 @@ static int stringmatchlen_impl(const char *pattern, int patternLen,
return 0;
}
/*
* glob-style pattern matching to check if a given pattern fully includes
* the prefix of a string. For the match to succeed, the pattern must end with
* an unescaped '*' character.
*
* Returns: 1 if the `pattern` fully matches the `prefixStr`. Returns 0 otherwise.
*/
int prefixmatch(const char *pattern, int patternLen,
const char *prefixStr, int prefixStrLen, int nocase) {
int skipLongerMatches = 0;
/* Step 1: Verify if the pattern matches the prefix string completely. */
if (!stringmatchlen_impl(pattern, patternLen, prefixStr, prefixStrLen, nocase, &skipLongerMatches, 0))
return 0;
/* Step 2: Verify that the pattern ends with an unescaped '*', indicating
* it can match any suffix of the string beyond the prefix. This check
* remains outside stringmatchlen_impl() to keep its complexity manageable.
*/
if (patternLen == 0 || pattern[patternLen - 1] != '*' )
return 0;
/* Count backward the number of consecutive backslashes preceding the '*'
* to determine if the '*' is escaped. */
int backslashCount = 0;
for (int i = patternLen - 2; i >= 0; i--) {
if (pattern[i] == '\\')
++backslashCount;
else
break; /* Stop counting when a non-backslash character is found. */
}
/* Return 1 if the '*' is not escaped (i.e., even count), 0 otherwise. */
return (backslashCount % 2 == 0);
}
/* Glob-style pattern matching to a string. */
int stringmatchlen(const char *pattern, int patternLen,
const char *string, int stringLen, int nocase) {
int skipLongerMatches = 0;

View File

@ -36,6 +36,8 @@ typedef enum {
LD_STR_HEX /* %La */
} ld2string_mode;
int prefixmatch(const char *pattern, int patternLen, const char *prefixStr,
int prefixStrLen, int nocase);
int stringmatchlen(const char *p, int plen, const char *s, int slen, int nocase);
int stringmatch(const char *p, const char *s, int nocase);
int stringmatchlen_fuzz_test(void);
@ -79,6 +81,19 @@ int snprintf_async_signal_safe(char *to, size_t n, const char *fmt, ...);
size_t redis_strlcpy(char *dst, const char *src, size_t dsize);
size_t redis_strlcat(char *dst, const char *src, size_t dsize);
/* to keep it opt without conditions Works only for: 0 < x < 2^63 */
static inline int log2ceil(size_t x) {
#if UINTPTR_MAX == 0xffffffffffffffff
return 63 - __builtin_clzll(x);
#else
return 31 - __builtin_clz(x);
#endif
}
#ifndef static_assert
#define static_assert(expr, lit) extern char __static_assert_failure[(expr) ? 1:-1]
#endif
#ifdef REDIS_TEST
int utilTest(int argc, char **argv, int flags);
#endif

View File

@ -2367,7 +2367,7 @@ int ziplistTest(int argc, char **argv, int flags) {
for (i = 0; i < iteration; i++) {
zl = ziplistNew();
ref = listCreate();
listSetFreeMethod(ref,(void (*)(void*))sdsfree);
listSetFreeMethod(ref, sdsfreegeneric);
len = rand() % 256;
/* Create lists */

View File

@ -46,7 +46,7 @@ test "Resharding all the master #0 slots away from it" {
}
test "Master #0 who lost all slots should turn into a replica without replicas" {
wait_for_condition 1000 50 {
wait_for_condition 2000 50 {
[RI 0 role] == "slave" && [RI 0 connected_slaves] == 0
} else {
puts [R 0 info replication]

View File

@ -1,18 +1,37 @@
#
# Copyright (c) 2009-Present, Redis Ltd.
# All rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of the Redis Source Available License 2.0
# (RSALv2) or the Server Side Public License v1 (SSPLv1).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
source tests/support/redis.tcl
set ::tlsdir "tests/tls"
proc gen_write_load {host port seconds tls} {
# Continuously sends SET commands to the server. If key is omitted, a random key
# is used for every SET command. The value is always random.
proc gen_write_load {host port seconds tls {key ""}} {
set start_time [clock seconds]
set r [redis $host $port 1 $tls]
$r client setname LOAD_HANDLER
$r select 9
while 1 {
if {$key == ""} {
$r set [expr rand()] [expr rand()]
} else {
$r set $key [expr rand()]
}
if {[clock seconds]-$start_time > $seconds} {
exit 0
}
}
}
gen_write_load [lindex $argv 0] [lindex $argv 1] [lindex $argv 2] [lindex $argv 3]
gen_write_load [lindex $argv 0] [lindex $argv 1] [lindex $argv 2] [lindex $argv 3] [lindex $argv 4]

View File

@ -1,6 +1,20 @@
#
# Copyright (c) 2009-Present, Redis Ltd.
# All rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of the Redis Source Available License 2.0
# (RSALv2) or the Server Side Public License v1 (SSPLv1).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
# This test group aims to test that all replicas share one global replication buffer,
# two replicas don't make replication buffer size double, and when there is no replica,
# replica buffer will shrink.
foreach rdbchannel {"yes" "no"} {
start_server {tags {"repl external:skip"}} {
start_server {} {
start_server {} {
@ -9,6 +23,10 @@ start_server {} {
set replica2 [srv -2 client]
set replica3 [srv -1 client]
$replica1 config set repl-rdb-channel $rdbchannel
$replica2 config set repl-rdb-channel $rdbchannel
$replica3 config set repl-rdb-channel $rdbchannel
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
@ -18,6 +36,7 @@ start_server {} {
$master config set repl-diskless-sync-delay 5
$master config set repl-diskless-sync-max-replicas 1
$master config set client-output-buffer-limit "replica 0 0 0"
$master config set repl-rdb-channel $rdbchannel
# Make sure replica3 is synchronized with master
$replica3 replicaof $master_host $master_port
@ -39,7 +58,7 @@ start_server {} {
fail "fail to sync with replicas"
}
test {All replicas share one global replication buffer} {
test "All replicas share one global replication buffer rdbchannel=$rdbchannel" {
set before_used [s used_memory]
populate 1024 "" 1024 ; # Write extra 1M data
# New data uses 1M memory, but all replicas use only one
@ -47,7 +66,13 @@ start_server {} {
# more than double of replication buffer.
set repl_buf_mem [s mem_total_replication_buffers]
set extra_mem [expr {[s used_memory]-$before_used-1024*1024}]
if {$rdbchannel == "yes"} {
# master's replication buffers should not grow
assert {$extra_mem < 1024*1024}
assert {$repl_buf_mem < 1024*1024}
} else {
assert {$extra_mem < 2*$repl_buf_mem}
}
# Kill replica1, replication_buffer will not become smaller
catch {$replica1 shutdown nosave}
@ -59,7 +84,7 @@ start_server {} {
assert_equal $repl_buf_mem [s mem_total_replication_buffers]
}
test {Replication buffer will become smaller when no replica uses} {
test "Replication buffer will become smaller when no replica uses rdbchannel=$rdbchannel" {
# Make sure replica3 catch up with the master
wait_for_ofs_sync $master $replica3
@ -71,8 +96,14 @@ start_server {} {
} else {
fail "replica2 doesn't disconnect with master"
}
if {$rdbchannel == "yes"} {
# master's replication buffers should not grow
assert {1024*512 > [s mem_total_replication_buffers]}
} else {
assert {[expr $repl_buf_mem - 1024*1024] > [s mem_total_replication_buffers]}
}
}
}
}
}
}
@ -84,6 +115,7 @@ start_server {} {
# partial re-synchronization. Of course, replication backlog memory also can
# become smaller when master disconnects with slow replicas since output buffer
# limit is reached.
foreach rdbchannel {"yes" "no"} {
start_server {tags {"repl external:skip"}} {
start_server {} {
start_server {} {
@ -98,6 +130,7 @@ start_server {} {
$master config set save ""
$master config set repl-backlog-size 16384
$master config set repl-rdb-channel $rdbchannel
$master config set client-output-buffer-limit "replica 0 0 0"
# Executing 'debug digest' on master which has many keys costs much time
@ -105,12 +138,16 @@ start_server {} {
# with master.
$master config set repl-timeout 1000
$replica1 config set repl-timeout 1000
$replica1 config set repl-rdb-channel $rdbchannel
$replica1 config set client-output-buffer-limit "replica 1024 0 0"
$replica2 config set repl-timeout 1000
$replica2 config set client-output-buffer-limit "replica 1024 0 0"
$replica2 config set repl-rdb-channel $rdbchannel
$replica1 replicaof $master_host $master_port
wait_for_sync $replica1
test {Replication backlog size can outgrow the backlog limit config} {
test "Replication backlog size can outgrow the backlog limit config rdbchannel=$rdbchannel" {
# Generating RDB will take 1000 seconds
$master config set rdb-key-save-delay 1000000
populate 1000 master 10000
@ -124,7 +161,7 @@ start_server {} {
}
# Replication actual backlog grow more than backlog setting since
# the slow replica2 kept replication buffer.
populate 10000 master 10000
populate 20000 master 10000
assert {[s repl_backlog_histlen] > [expr 10000*10000]}
}
@ -135,7 +172,7 @@ start_server {} {
fail "Replica offset didn't catch up with the master after too long time"
}
test {Replica could use replication buffer (beyond backlog config) for partial resynchronization} {
test "Replica could use replication buffer (beyond backlog config) for partial resynchronization rdbchannel=$rdbchannel" {
# replica1 disconnects with master
$replica1 replicaof [srv -1 host] [srv -1 port]
# Write a mass of data that exceeds repl-backlog-size
@ -155,7 +192,7 @@ start_server {} {
assert_equal [$master debug digest] [$replica1 debug digest]
}
test {Replication backlog memory will become smaller if disconnecting with replica} {
test "Replication backlog memory will become smaller if disconnecting with replica rdbchannel=$rdbchannel" {
assert {[s repl_backlog_histlen] > [expr 2*10000*10000]}
assert_equal [s connected_slaves] {2}
@ -165,8 +202,11 @@ start_server {} {
r set key [string repeat A [expr 64*1024]]
# master will close replica2's connection since replica2's output
# buffer limit is reached, so there only is replica1.
# In case of rdbchannel=yes, main channel will be disconnected only.
wait_for_condition 100 100 {
[s connected_slaves] eq {1}
[s connected_slaves] eq {1} ||
([s connected_slaves] eq {2} &&
[string match {*slave*state=wait_bgsave*} [$master info]])
} else {
fail "master didn't disconnect with replica2"
}
@ -185,15 +225,19 @@ start_server {} {
}
}
}
}
test {Partial resynchronization is successful even client-output-buffer-limit is less than repl-backlog-size} {
foreach rdbchannel {"yes" "no"} {
test "Partial resynchronization is successful even client-output-buffer-limit is less than repl-backlog-size rdbchannel=$rdbchannel" {
start_server {tags {"repl external:skip"}} {
start_server {} {
r config set save ""
r config set repl-backlog-size 100mb
r config set client-output-buffer-limit "replica 512k 0 0"
r config set repl-rdb-channel $rdbchannel
set replica [srv -1 client]
$replica config set repl-rdb-channel $rdbchannel
$replica replicaof [srv 0 host] [srv 0 port]
wait_for_sync $replica
@ -231,7 +275,7 @@ test {Partial resynchronization is successful even client-output-buffer-limit is
}
# This test was added to make sure big keys added to the backlog do not trigger psync loop.
test {Replica client-output-buffer size is limited to backlog_limit/16 when no replication data is pending} {
test "Replica client-output-buffer size is limited to backlog_limit/16 when no replication data is pending rdbchannel=$rdbchannel" {
proc client_field {r type f} {
set client [$r client list type $type]
if {![regexp $f=(\[a-zA-Z0-9-\]+) $client - res]} {
@ -252,6 +296,8 @@ test {Replica client-output-buffer size is limited to backlog_limit/16 when no r
$master config set repl-backlog-size 16384
$master config set client-output-buffer-limit "replica 32768 32768 60"
$master config set repl-rdb-channel $rdbchannel
$replica config set repl-rdb-channel $rdbchannel
# Key has has to be larger than replica client-output-buffer limit.
set keysize [expr 256*1024]
@ -304,4 +350,5 @@ test {Replica client-output-buffer size is limited to backlog_limit/16 when no r
}
}
}
}

View File

@ -1,3 +1,16 @@
#
# Copyright (c) 2009-Present, Redis Ltd.
# All rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of the Redis Source Available License 2.0
# (RSALv2) or the Server Side Public License v1 (SSPLv1).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
# Creates a master-slave pair and breaks the link continuously to force
# partial resyncs attempts, all this while flooding the master with
# write queries.
@ -8,7 +21,7 @@
# If reconnect is > 0, the test actually try to break the connection and
# reconnect with the master, otherwise just the initial synchronization is
# checked for consistency.
proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reconnect} {
proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reconnect rdbchannel} {
start_server {tags {"repl"} overrides {save {}}} {
start_server {overrides {save {}}} {
@ -21,7 +34,9 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reco
$master config set repl-backlog-ttl $backlog_ttl
$master config set repl-diskless-sync $mdl
$master config set repl-diskless-sync-delay 1
$master config set repl-rdb-channel $rdbchannel
$slave config set repl-diskless-load $sdl
$slave config set repl-rdb-channel $rdbchannel
set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000]
set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000]
@ -46,7 +61,7 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reco
}
}
test "Test replication partial resync: $descr (diskless: $mdl, $sdl, reconnect: $reconnect)" {
test "Test replication partial resync: $descr (diskless: $mdl, $sdl, reconnect: $reconnect, rdbchannel: $rdbchannel)" {
# Now while the clients are writing data, break the maste-slave
# link multiple times.
if ($reconnect) {
@ -120,24 +135,31 @@ proc test_psync {descr duration backlog_size backlog_ttl delay cond mdl sdl reco
tags {"external:skip"} {
foreach mdl {no yes} {
foreach sdl {disabled swapdb} {
foreach rdbchannel {yes no} {
if {$rdbchannel == "yes" && $mdl == "no"} {
# rdbchannel replication requires repl-diskless-sync enabled
continue
}
test_psync {no reconnection, just sync} 6 1000000 3600 0 {
} $mdl $sdl 0
} $mdl $sdl 0 $rdbchannel
test_psync {ok psync} 6 100000000 3600 0 {
assert {[s -1 sync_partial_ok] > 0}
} $mdl $sdl 1
} $mdl $sdl 1 $rdbchannel
test_psync {no backlog} 6 100 3600 0.5 {
assert {[s -1 sync_partial_err] > 0}
} $mdl $sdl 1
} $mdl $sdl 1 $rdbchannel
test_psync {ok after delay} 3 100000000 3600 3 {
assert {[s -1 sync_partial_ok] > 0}
} $mdl $sdl 1
} $mdl $sdl 1 $rdbchannel
test_psync {backlog expired} 3 100000000 1 3 {
assert {[s -1 sync_partial_err] > 0}
} $mdl $sdl 1
} $mdl $sdl 1 $rdbchannel
}
}
}
}

View File

@ -0,0 +1,795 @@
#
# Copyright (c) 2009-Present, Redis Ltd.
# All rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of the Redis Source Available License 2.0
# (RSALv2) or the Server Side Public License v1 (SSPLv1).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
# Returns either main or rdbchannel client id
# Assumes there is one replica with two channels
proc get_replica_client_id {master rdbchannel} {
set input [$master client list type replica]
foreach line [split $input "\n"] {
if {[regexp {id=(\d+).*flags=(\S+)} $line match id flags]} {
if {$rdbchannel == "yes"} {
# rdbchannel will have C flag
if {[string match *C* $flags]} {
return $id
}
} else {
return $id
}
}
}
error "Replica not found"
}
start_server {tags {"repl external:skip"}} {
set replica1 [srv 0 client]
start_server {} {
set replica2 [srv 0 client]
start_server {} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
$master config set repl-diskless-sync yes
$master config set repl-rdb-channel yes
populate 1000 master 10
test "Test replication with multiple replicas (rdbchannel enabled on both)" {
$replica1 config set repl-rdb-channel yes
$replica1 replicaof $master_host $master_port
$replica2 config set repl-rdb-channel yes
$replica2 replicaof $master_host $master_port
wait_replica_online $master 0
wait_replica_online $master 1
$master set x 1
# Wait until replicas catch master
wait_for_ofs_sync $master $replica1
wait_for_ofs_sync $master $replica2
# Verify db's are identical
assert_morethan [$master dbsize] 0
assert_equal [$master get x] 1
assert_equal [$master debug digest] [$replica1 debug digest]
assert_equal [$master debug digest] [$replica2 debug digest]
}
test "Test replication with multiple replicas (rdbchannel enabled on one of them)" {
# Allow both replicas to ask for sync
$master config set repl-diskless-sync-delay 5
$replica1 replicaof no one
$replica2 replicaof no one
$replica1 config set repl-rdb-channel yes
$replica2 config set repl-rdb-channel no
set prev_forks [s 0 total_forks]
$master set x 2
# There will be two forks subsequently, one for rdbchannel
# replica another for the replica without rdbchannel config.
$replica1 replicaof $master_host $master_port
$replica2 replicaof $master_host $master_port
set res [wait_for_log_messages 0 {"*Starting BGSAVE* replicas sockets (rdb-channel)*"} 0 2000 10]
set loglines [lindex $res 1]
wait_for_log_messages 0 {"*Starting BGSAVE* replicas sockets*"} $loglines 2000 10
wait_replica_online $master 0 100 100
wait_replica_online $master 1 100 100
# Verify two new forks.
assert_equal [s 0 total_forks] [expr $prev_forks + 2]
wait_for_ofs_sync $master $replica1
wait_for_ofs_sync $master $replica2
# Verify db's are identical
assert_equal [$replica1 get x] 2
assert_equal [$replica2 get x] 2
assert_equal [$master debug digest] [$replica1 debug digest]
assert_equal [$master debug digest] [$replica2 debug digest]
}
test "Test rdbchannel is not used if repl-diskless-sync config is disabled on master" {
$replica1 replicaof no one
$replica2 replicaof no one
$master config set repl-diskless-sync-delay 0
$master config set repl-diskless-sync no
$master set x 3
$replica1 replicaof $master_host $master_port
# Verify log message does not mention rdbchannel
wait_for_log_messages 0 {"*Starting BGSAVE for SYNC with target: disk*"} 0 2000 1
wait_replica_online $master 0
wait_for_ofs_sync $master $replica1
# Verify db's are identical
assert_equal [$replica1 get x] 3
assert_equal [$master debug digest] [$replica1 debug digest]
}
}
}
}
start_server {tags {"repl external:skip"}} {
set replica [srv 0 client]
set replica_pid [srv 0 pid]
start_server {} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
$master config set repl-rdb-channel yes
$replica config set repl-rdb-channel yes
# Reuse this test to verify large key delivery
$master config set rdbcompression no
$master config set rdb-key-save-delay 3000
populate 1000 prefix1 10
populate 5 prefix2 3000000
populate 5 prefix3 2000000
populate 5 prefix4 1000000
# On master info output, we should see state transition in this order:
# 1. wait_bgsave: Replica receives psync error (+RDBCHANNELSYNC)
# 2. send_bulk_and_stream: Replica opens rdbchannel and delivery started
# 3. online: Sync is completed
test "Test replica state should start with wait_bgsave" {
$replica config set key-load-delay 100000
# Pause replica before opening rdb channel conn
$replica debug repl-pause before-rdb-channel
$replica replicaof $master_host $master_port
wait_for_condition 50 200 {
[s 0 connected_slaves] == 1 &&
[string match "*wait_bgsave*" [s 0 slave0]]
} else {
fail "replica failed"
}
}
test "Test replica state advances to send_bulk_and_stream when rdbchannel connects" {
$master set x 1
resume_process $replica_pid
wait_for_condition 50 200 {
[s 0 connected_slaves] == 1 &&
[s 0 rdb_bgsave_in_progress] == 1 &&
[string match "*send_bulk_and_stream*" [s 0 slave0]]
} else {
fail "replica failed"
}
}
test "Test replica rdbchannel client has SC flag on client list output" {
set input [$master client list type replica]
# There will two replicas, second one should be rdbchannel
set trimmed_input [string trimright $input]
set lines [split $trimmed_input "\n"]
if {[llength $lines] < 2} {
error "There is no second line in the input: $input"
}
set second_line [lindex $lines 1]
# Check if 'flags=SC' exists in the second line
if {![regexp {flags=SC} $second_line]} {
error "Flags are not 'SC' in the second line: $second_line"
}
}
test "Test replica state advances to online when fullsync is completed" {
# Speed up loading
$replica config set key-load-delay 0
wait_replica_online $master 0 100 1000
wait_for_ofs_sync $master $replica
wait_for_condition 50 200 {
[s 0 rdb_bgsave_in_progress] == 0 &&
[s 0 connected_slaves] == 1 &&
[string match "*online*" [s 0 slave0]]
} else {
fail "replica failed"
}
wait_replica_online $master 0 100 1000
wait_for_ofs_sync $master $replica
# Verify db's are identical
assert_morethan [$master dbsize] 0
assert_equal [$master debug digest] [$replica debug digest]
}
}
}
start_server {tags {"repl external:skip"}} {
set replica [srv 0 client]
start_server {} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
$master config set repl-rdb-channel yes
$replica config set repl-rdb-channel yes
test "Test master memory does not increase during replication" {
# Put some delay to rdb generation. If master doesn't forward
# incoming traffic to replica, master's replication buffer will grow
$master config set rdb-key-save-delay 200
$master config set repl-backlog-size 5mb
populate 10000 master 10000
# Start write traffic
set load_handle [start_write_load $master_host $master_port 100 "key1"]
set prev_used [s 0 used_memory]
$replica replicaof $master_host $master_port
set backlog_size [lindex [$master config get repl-backlog-size] 1]
# Verify used_memory stays low
set max_retry 1000
set prev_buf_size 0
while {$max_retry} {
assert_lessthan [expr [s 0 used_memory] - $prev_used] 20000000
assert_lessthan_equal [s 0 mem_total_replication_buffers] [expr {$backlog_size + 1000000}]
# Check replica state
if {[string match *slave0*state=online* [$master info]] &&
[s -1 master_link_status] == "up"} {
break
} else {
incr max_retry -1
after 10
}
}
if {$max_retry == 0} {
error "assertion:Replica not in sync after 10 seconds"
}
stop_write_load $load_handle
}
}
}
start_server {tags {"repl external:skip"}} {
set replica [srv 0 client]
start_server {} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
$master config set repl-rdb-channel yes
$replica config set repl-rdb-channel yes
test "Test replication stream buffer becomes full on replica" {
# For replication stream accumulation, replica inherits slave output
# buffer limit as the size limit. In this test, we create traffic to
# fill the buffer fully. Once the limit is reached, accumulation
# will stop. This is not a failure scenario though. From that point,
# further accumulation may occur on master side. Replication should
# be completed successfully.
# Create some artificial delay for rdb delivery and load. We'll
# generate some traffic to fill the replication buffer.
$master config set rdb-key-save-delay 1000
$replica config set key-load-delay 1000
$replica config set client-output-buffer-limit "replica 64kb 64kb 0"
populate 2000 master 1
set prev_sync_full [s 0 sync_full]
$replica replicaof $master_host $master_port
# Wait for replica to establish psync using main channel
wait_for_condition 500 1000 {
[string match "*state=send_bulk_and_stream*" [s 0 slave0]]
} else {
fail "replica didn't start sync"
}
# Create some traffic on replication stream
populate 100 master 100000
# Wait for replica's buffer limit reached
wait_for_log_messages -1 {"*Replication buffer limit has been reached*"} 0 1000 10
# Speed up loading
$replica config set key-load-delay 0
# Wait until sync is successful
wait_for_condition 200 200 {
[status $master master_repl_offset] eq [status $replica master_repl_offset] &&
[status $master master_repl_offset] eq [status $replica slave_repl_offset]
} else {
fail "replica offsets didn't match in time"
}
# Verify sync was not interrupted.
assert_equal [s 0 sync_full] [expr $prev_sync_full + 1]
# Verify db's are identical
assert_morethan [$master dbsize] 0
assert_equal [$master debug digest] [$replica debug digest]
}
test "Test replication stream buffer config replica-full-sync-buffer-limit" {
# By default, replica inherits client-output-buffer-limit of replica
# to limit accumulated repl data during rdbchannel sync.
# replica-full-sync-buffer-limit should override it if it is set.
$replica replicaof no one
# Create some artificial delay for rdb delivery and load. We'll
# generate some traffic to fill the replication buffer.
$master config set rdb-key-save-delay 1000
$replica config set key-load-delay 1000
$replica config set client-output-buffer-limit "replica 1024 1024 0"
$replica config set replica-full-sync-buffer-limit 20mb
populate 2000 master 1
$replica replicaof $master_host $master_port
# Wait until replication starts
wait_for_condition 500 1000 {
[string match "*state=send_bulk_and_stream*" [s 0 slave0]]
} else {
fail "replica didn't start sync"
}
# Create some traffic on replication stream
populate 100 master 100000
# Make sure config is used, we accumulated more than
# client-output-buffer-limit
assert_morethan [s -1 replica_full_sync_buffer_size] 1024
}
}
}
start_server {tags {"repl external:skip"}} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
set master_pid [srv 0 pid]
set loglines [count_log_lines 0]
$master config set repl-diskless-sync yes
$master config set repl-rdb-channel yes
$master config set repl-backlog-size 1mb
$master config set client-output-buffer-limit "replica 100k 0 0"
$master config set loglevel debug
$master config set repl-diskless-sync-delay 3
start_server {} {
set replica [srv 0 client]
set replica_pid [srv 0 pid]
$replica config set repl-rdb-channel yes
$replica config set loglevel debug
$replica config set repl-timeout 10
$replica config set key-load-delay 10000
$replica config set loading-process-events-interval-bytes 1024
test "Test master disconnects replica when output buffer limit is reached" {
populate 20000 master 100 -1
$replica replicaof $master_host $master_port
wait_for_condition 50 200 {
[s 0 loading] == 1
} else {
fail "[s 0 loading] sdsdad"
}
# Generate some traffic for backlog ~2mb
populate 20 master 1000000 -1
set res [wait_for_log_messages -1 {"*Client * closed * for overcoming of output buffer limits.*"} $loglines 1000 10]
set loglines [lindex $res 1]
$replica config set key-load-delay 0
# Wait until replica loads RDB
wait_for_log_messages 0 {"*Done loading RDB*"} 0 1000 10
}
test "Test replication recovers after output buffer failures" {
# Verify system is operational
$master set x 1
# Wait until replica catches up
wait_replica_online $master 0 1000 100
wait_for_ofs_sync $master $replica
# Verify db's are identical
assert_morethan [$master dbsize] 0
assert_equal [$replica get x] 1
assert_equal [$master debug digest] [$replica debug digest]
}
}
}
start_server {tags {"repl external:skip"}} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
$master config set repl-diskless-sync yes
$master config set repl-rdb-channel yes
$master config set rdb-key-save-delay 300
$master config set client-output-buffer-limit "replica 0 0 0"
$master config set repl-diskless-sync-delay 5
$master config set loglevel debug
populate 10000 master 1
start_server {} {
set replica1 [srv 0 client]
$replica1 config set repl-rdb-channel yes
$replica1 config set loglevel debug
start_server {} {
set replica2 [srv 0 client]
$replica2 config set repl-rdb-channel yes
$replica2 config set loglevel debug
set load_handle [start_write_load $master_host $master_port 100 "key"]
test "Test master continues RDB delivery if not all replicas are dropped" {
$replica1 replicaof $master_host $master_port
$replica2 replicaof $master_host $master_port
wait_for_condition 50 200 {
[s -2 rdb_bgsave_in_progress] == 1
} else {
fail "Sync did not start"
}
# Wait for both replicas main conns to establish psync
wait_for_condition 500 100 {
[s -2 connected_slaves] == 2
} else {
fail "Replicas didn't establish psync:
sync_partial_ok: [s -2 sync_partial_ok]"
}
# kill one of the replicas
catch {$replica1 shutdown nosave}
# Wait until replica completes full sync
# Verify there is no other full sync attempt
wait_for_condition 50 1000 {
[s 0 master_link_status] == "up" &&
[s -2 sync_full] == 2 &&
[s -2 connected_slaves] == 1
} else {
fail "Sync session did not continue
master_link_status: [s 0 master_link_status]
sync_full:[s -2 sync_full]
connected_slaves: [s -2 connected_slaves]"
}
}
test "Test master aborts rdb delivery if all replicas are dropped" {
$replica2 replicaof no one
# Start replication
$replica2 replicaof $master_host $master_port
wait_for_condition 50 1000 {
[s -2 rdb_bgsave_in_progress] == 1
} else {
fail "Sync did not start"
}
set loglines [count_log_lines -2]
# kill replica
catch {$replica2 shutdown nosave}
# Verify master aborts rdb save
wait_for_condition 50 1000 {
[s -2 rdb_bgsave_in_progress] == 0 &&
[s -2 connected_slaves] == 0
} else {
fail "Master should abort the sync
rdb_bgsave_in_progress:[s -2 rdb_bgsave_in_progress]
connected_slaves: [s -2 connected_slaves]"
}
wait_for_log_messages -2 {"*Background transfer error*"} $loglines 1000 50
}
stop_write_load $load_handle
}
}
}
start_server {tags {"repl external:skip"}} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
$master config set repl-diskless-sync yes
$master config set repl-rdb-channel yes
$master config set loglevel debug
$master config set rdb-key-save-delay 1000
populate 3000 prefix1 1
populate 100 prefix2 100000
start_server {} {
set replica [srv 0 client]
set replica_pid [srv 0 pid]
$replica config set repl-rdb-channel yes
$replica config set loglevel debug
$replica config set repl-timeout 10
set load_handle [start_write_load $master_host $master_port 100 "key"]
test "Test replica recovers when rdb channel connection is killed" {
$replica replicaof $master_host $master_port
# Wait for sync session to start
wait_for_condition 500 200 {
[string match "*state=send_bulk_and_stream*" [s -1 slave0]] &&
[s -1 rdb_bgsave_in_progress] eq 1
} else {
fail "replica didn't start sync session in time"
}
set loglines [count_log_lines -1]
# Kill rdb channel client
set id [get_replica_client_id $master yes]
$master client kill id $id
wait_for_log_messages -1 {"*Background transfer error*"} $loglines 1000 10
# Verify master rejects main-ch-client-id after connection is killed
assert_error {*Unrecognized*} {$master replconf main-ch-client-id $id}
# Replica should retry
wait_for_condition 500 200 {
[string match "*state=send_bulk_and_stream*" [s -1 slave0]] &&
[s -1 rdb_bgsave_in_progress] eq 1
} else {
fail "replica didn't retry after connection close"
}
}
test "Test replica recovers when main channel connection is killed" {
set loglines [count_log_lines -1]
# Kill main channel client
set id [get_replica_client_id $master yes]
$master client kill id $id
wait_for_log_messages -1 {"*Background transfer error*"} $loglines 1000 20
# Replica should retry
wait_for_condition 500 2000 {
[string match "*state=send_bulk_and_stream*" [s -1 slave0]] &&
[s -1 rdb_bgsave_in_progress] eq 1
} else {
fail "replica didn't retry after connection close"
}
}
stop_write_load $load_handle
test "Test replica recovers connection failures" {
# Wait until replica catches up
wait_replica_online $master 0 1000 100
wait_for_ofs_sync $master $replica
# Verify db's are identical
assert_morethan [$master dbsize] 0
assert_equal [$master debug digest] [$replica debug digest]
}
}
}
start_server {tags {"repl external:skip"}} {
set replica [srv 0 client]
set replica_pid [srv 0 pid]
start_server {} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
test "Test master connection drops while streaming repl buffer into the db" {
# Just after replica loads RDB, it will stream repl buffer into the
# db. During streaming, we kill the master connection. Replica
# will abort streaming and then try another psync with master.
$master config set rdb-key-save-delay 1000
$master config set repl-rdb-channel yes
$master config set repl-diskless-sync yes
$replica config set repl-rdb-channel yes
$replica config set loading-process-events-interval-bytes 1024
# Populate db and start write traffic
populate 2000 master 1000
set load_handle [start_write_load $master_host $master_port 100 "key1"]
# Replica will pause in the loop of repl buffer streaming
$replica debug repl-pause on-streaming-repl-buf
$replica replicaof $master_host $master_port
# Check if repl stream accumulation is started.
wait_for_condition 50 1000 {
[s -1 replica_full_sync_buffer_size] > 0
} else {
fail "repl stream accumulation not started"
}
# Wait until replica starts streaming repl buffer
wait_for_log_messages -1 {"*Starting to stream replication buffer*"} 0 2000 10
stop_write_load $load_handle
$master config set rdb-key-save-delay 0
# Kill master connection and resume the process
$replica deferred 1
$replica client kill type master
$replica debug repl-pause clear
resume_process $replica_pid
$replica read
$replica read
$replica deferred 0
wait_for_log_messages -1 {"*Master client was freed while streaming*"} 0 500 10
# Quick check for stats test coverage
assert_morethan_equal [s -1 replica_full_sync_buffer_peak] [s -1 replica_full_sync_buffer_size]
# Wait until replica recovers and verify db's are identical
wait_replica_online $master 0 1000 10
wait_for_ofs_sync $master $replica
assert_morethan [$master dbsize] 0
assert_equal [$master debug digest] [$replica debug digest]
}
}
}
start_server {tags {"repl external:skip"}} {
set replica [srv 0 client]
set replica_pid [srv 0 pid]
start_server {} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
test "Test main channel connection drops while loading rdb (disk based)" {
# While loading rdb, we kill main channel connection.
# We expect replica to complete loading RDB and then try psync
# with the master.
$master config set repl-rdb-channel yes
$replica config set repl-rdb-channel yes
$replica config set repl-diskless-load disabled
$replica config set key-load-delay 10000
$replica config set loading-process-events-interval-bytes 1024
# Populate db and start write traffic
populate 10000 master 100
$replica replicaof $master_host $master_port
# Wait until replica starts loading
wait_for_condition 50 200 {
[s -1 loading] == 1
} else {
fail "replica did not start loading"
}
# Kill replica connections
$master client kill type replica
$master set x 1
# At this point, we expect replica to complete loading RDB. Then,
# it will try psync with master.
wait_for_log_messages -1 {"*Aborting rdb channel sync while loading the RDB*"} 0 2000 10
wait_for_log_messages -1 {"*After loading RDB, replica will try psync with master*"} 0 2000 10
# Speed up loading
$replica config set key-load-delay 0
# Wait until replica becomes online
wait_replica_online $master 0 100 100
# Verify there is another successful psync and no other full sync
wait_for_condition 50 200 {
[s 0 sync_full] == 1 &&
[s 0 sync_partial_ok] == 1
} else {
fail "psync was not successful [s 0 sync_full] [s 0 sync_partial_ok]"
}
# Verify db's are identical after recovery
wait_for_ofs_sync $master $replica
assert_morethan [$master dbsize] 0
assert_equal [$master debug digest] [$replica debug digest]
}
}
}
start_server {tags {"repl external:skip"}} {
set replica [srv 0 client]
set replica_pid [srv 0 pid]
start_server {} {
set master [srv 0 client]
set master_host [srv 0 host]
set master_port [srv 0 port]
test "Test main channel connection drops while loading rdb (diskless)" {
# While loading rdb, kill both main and rdbchannel connections.
# We expect replica to abort sync and later retry again.
$master config set repl-rdb-channel yes
$replica config set repl-rdb-channel yes
$replica config set repl-diskless-load swapdb
$replica config set key-load-delay 10000
$replica config set loading-process-events-interval-bytes 1024
# Populate db and start write traffic
populate 10000 master 100
$replica replicaof $master_host $master_port
# Wait until replica starts loading
wait_for_condition 50 200 {
[s -1 loading] == 1
} else {
fail "replica did not start loading"
}
# Kill replica connections
$master client kill type replica
$master set x 1
# At this point, we expect replica to abort loading RDB.
wait_for_log_messages -1 {"*Aborting rdb channel sync while loading the RDB*"} 0 2000 10
wait_for_log_messages -1 {"*Failed trying to load the MASTER synchronization DB from socket*"} 0 2000 10
# Speed up loading
$replica config set key-load-delay 0
stop_write_load $load_handle
# Wait until replica recovers and becomes online
wait_replica_online $master 0 100 100
# Verify replica attempts another full sync
wait_for_condition 50 200 {
[s 0 sync_full] == 2 &&
[s 0 sync_partial_ok] == 0
} else {
fail "sync was not successful [s 0 sync_full] [s 0 sync_partial_ok]"
}
# Verify db's are identical after recovery
wait_for_ofs_sync $master $replica
assert_morethan [$master dbsize] 0
assert_equal [$master debug digest] [$replica debug digest]
}
}
}

View File

@ -1,3 +1,16 @@
#
# Copyright (c) 2009-Present, Redis Ltd.
# All rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of the Redis Source Available License 2.0
# (RSALv2) or the Server Side Public License v1 (SSPLv1).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
proc log_file_matches {log pattern} {
set fp [open $log r]
set content [read $fp]
@ -303,7 +316,7 @@ start_server {tags {"repl external:skip"}} {
}
}
foreach mdl {no yes} {
foreach mdl {no yes} rdbchannel {no yes} {
foreach sdl {disabled swapdb} {
start_server {tags {"repl external:skip"} overrides {save {}}} {
set master [srv 0 client]
@ -319,7 +332,13 @@ foreach mdl {no yes} {
lappend slaves [srv 0 client]
start_server {overrides {save {}}} {
lappend slaves [srv 0 client]
test "Connect multiple replicas at the same time (issue #141), master diskless=$mdl, replica diskless=$sdl" {
test "Connect multiple replicas at the same time (issue #141), master diskless=$mdl, replica diskless=$sdl, rdbchannel=$rdbchannel" {
$master config set repl-rdb-channel $rdbchannel
[lindex $slaves 0] config set repl-rdb-channel $rdbchannel
[lindex $slaves 1] config set repl-rdb-channel $rdbchannel
[lindex $slaves 2] config set repl-rdb-channel $rdbchannel
# start load handles only inside the test, so that the test can be skipped
set load_handle0 [start_bg_complex_data $master_host $master_port 9 100000000]
set load_handle1 [start_bg_complex_data $master_host $master_port 11 100000000]
@ -438,7 +457,7 @@ start_server {tags {"repl external:skip"} overrides {save {}}} {
}
# Diskless load swapdb when NOT async_loading (different master replid)
foreach testType {Successful Aborted} {
foreach testType {Successful Aborted} rdbchannel {yes no} {
start_server {tags {"repl external:skip"}} {
set replica [srv 0 client]
set replica_host [srv 0 host]
@ -453,6 +472,7 @@ foreach testType {Successful Aborted} {
$master config set repl-diskless-sync yes
$master config set repl-diskless-sync-delay 0
$master config set save ""
$master config set repl-rdb-channel $rdbchannel
$replica config set repl-diskless-load swapdb
$replica config set save ""
@ -474,7 +494,7 @@ foreach testType {Successful Aborted} {
# Start the replication process
$replica replicaof $master_host $master_port
test {Diskless load swapdb (different replid): replica enter loading} {
test "Diskless load swapdb (different replid): replica enter loading rdbchannel=$rdbchannel" {
# Wait for the replica to start reading the rdb
wait_for_condition 100 100 {
[s -1 loading] eq 1
@ -498,7 +518,7 @@ foreach testType {Successful Aborted} {
fail "Replica didn't disconnect"
}
test {Diskless load swapdb (different replid): old database is exposed after replication fails} {
test "Diskless load swapdb (different replid): old database is exposed after replication fails rdbchannel=$rdbchannel" {
# Ensure we see old values from replica
assert_equal [$replica get mykey] "myvalue"
@ -590,8 +610,8 @@ foreach testType {Successful Aborted} {
if {$testType == "Aborted"} {
# Set master with a slow rdb generation, so that we can easily intercept loading
# 10ms per key, with 2000 keys is 20 seconds
$master config set rdb-key-save-delay 10000
# 20ms per key, with 2000 keys is 40 seconds
$master config set rdb-key-save-delay 20000
}
# Force the replica to try another full sync (this time it will have matching master replid)
@ -862,6 +882,7 @@ start_server {tags {"repl external:skip"} overrides {save ""}} {
# we also need the replica to process requests during transfer (which it does only once in 2mb)
$master debug populate 20000 test 10000
$master config set rdbcompression no
$master config set repl-rdb-channel no
# If running on Linux, we also measure utime/stime to detect possible I/O handling issues
set os [catch {exec uname}]
set measure_time [expr {$os == "Linux"} ? 1 : 0]
@ -1009,6 +1030,7 @@ test "diskless replication child being killed is collected" {
set master_pid [srv 0 pid]
$master config set repl-diskless-sync yes
$master config set repl-diskless-sync-delay 0
$master config set repl-rdb-channel no
# put enough data in the db that the rdb file will be bigger than the socket buffers
$master debug populate 20000 test 10000
$master config set rdbcompression no
@ -1269,7 +1291,8 @@ start_server {tags {"repl external:skip"}} {
r slaveof $master2_host $master2_port
wait_for_condition 50 100 {
([s -2 rdb_bgsave_in_progress] == 1) &&
([string match "*wait_bgsave*" [s -2 slave0]])
([string match "*wait_bgsave*" [s -2 slave0]] ||
[string match "*send_bulk_and_stream*" [s -2 slave0]])
} else {
fail "full sync didn't start"
}

View File

@ -156,6 +156,11 @@ test "Shutting down master waits for replica then fails" {
set rd2 [redis_deferring_client -1]
$rd1 shutdown
$rd2 shutdown
wait_for_condition 100 10 {
[llength [regexp -all -inline {cmd=shutdown} [$master client list]]] eq 2
} else {
fail "shutdown did not arrive"
}
set info_clients [$master info clients]
assert_match "*connected_clients:3*" $info_clients
assert_match "*blocked_clients:2*" $info_clients
@ -209,6 +214,11 @@ test "Shutting down master waits for replica then aborted" {
set rd2 [redis_deferring_client -1]
$rd1 shutdown
$rd2 shutdown
wait_for_condition 100 10 {
[llength [regexp -all -inline {cmd=shutdown} [$master client list]]] eq 2
} else {
fail "shutdown did not arrive"
}
set info_clients [$master info clients]
assert_match "*connected_clients:3*" $info_clients
assert_match "*blocked_clients:2*" $info_clients

View File

@ -51,6 +51,52 @@ int set_aclcheck_key(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
return REDISMODULE_OK;
}
/* A wrap for SET command with ACL check on the key. */
int set_aclcheck_prefixkey(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc < 4) {
return RedisModule_WrongArity(ctx);
}
int permissions;
const char *flags = RedisModule_StringPtrLen(argv[1], NULL);
if (!strcasecmp(flags, "W")) {
permissions = REDISMODULE_CMD_KEY_UPDATE;
} else if (!strcasecmp(flags, "R")) {
permissions = REDISMODULE_CMD_KEY_ACCESS;
} else if (!strcasecmp(flags, "*")) {
permissions = REDISMODULE_CMD_KEY_UPDATE | REDISMODULE_CMD_KEY_ACCESS;
} else if (!strcasecmp(flags, "~")) {
permissions = 0; /* Requires either read or write */
} else {
RedisModule_ReplyWithError(ctx, "INVALID FLAGS");
return REDISMODULE_OK;
}
/* Check that the key can be accessed */
RedisModuleString *user_name = RedisModule_GetCurrentUserName(ctx);
RedisModuleUser *user = RedisModule_GetModuleUserFromUserName(user_name);
int ret = RedisModule_ACLCheckKeyPrefixPermissions(user, argv[2], permissions);
if (ret != 0) {
RedisModule_ReplyWithError(ctx, "DENIED KEY");
RedisModule_FreeModuleUser(user);
RedisModule_FreeString(ctx, user_name);
return REDISMODULE_OK;
}
RedisModuleCallReply *rep = RedisModule_Call(ctx, "SET", "v", argv + 3, argc - 3);
if (!rep) {
RedisModule_ReplyWithError(ctx, "NULL reply returned");
} else {
RedisModule_ReplyWithCallReply(ctx, rep);
RedisModule_FreeCallReply(rep);
}
RedisModule_FreeModuleUser(user);
RedisModule_FreeString(ctx, user_name);
return REDISMODULE_OK;
}
/* A wrap for PUBLISH command with ACL check on the channel. */
int publish_aclcheck_channel(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc != 3) {
@ -247,6 +293,9 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
if (RedisModule_CreateCommand(ctx,"aclcheck.set.check.key", set_aclcheck_key,"write",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx,"aclcheck.set.check.prefixkey", set_aclcheck_prefixkey,"write",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx,"block.commands.outside.onload", commandBlockCheck,"write",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR;

View File

@ -21,19 +21,291 @@ void segfaultCrash(RedisModuleInfoCtx *ctx, int for_crash_report) {
*p = 'x';
}
int cmd_crash(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
UNUSED(ctx);
UNUSED(argv);
UNUSED(argc);
RedisModule_Assert(0);
return REDISMODULE_OK;
}
int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
REDISMODULE_NOT_USED(argv);
REDISMODULE_NOT_USED(argc);
if (RedisModule_Init(ctx,"infocrash",1,REDISMODULE_APIVER_1)
if (RedisModule_Init(ctx,"modulecrash",1,REDISMODULE_APIVER_1)
== REDISMODULE_ERR) return REDISMODULE_ERR;
RedisModule_Assert(argc == 1);
if (argc >= 1) {
if (!strcasecmp(RedisModule_StringPtrLen(argv[0], NULL), "segfault")) {
if (RedisModule_RegisterInfoFunc(ctx, segfaultCrash) == REDISMODULE_ERR) return REDISMODULE_ERR;
} else if(!strcasecmp(RedisModule_StringPtrLen(argv[0], NULL), "assert")) {
} else if (!strcasecmp(RedisModule_StringPtrLen(argv[0], NULL),"assert")) {
if (RedisModule_RegisterInfoFunc(ctx, assertCrash) == REDISMODULE_ERR) return REDISMODULE_ERR;
} else {
return REDISMODULE_ERR;
}
}
/* Create modulecrash.xadd command which is similar to xadd command.
* It will crash in the command handler to verify we print command tokens
* when hide-user-data-from-log config is enabled */
RedisModuleCommandInfo info = {
.version = REDISMODULE_COMMAND_INFO_VERSION,
.arity = -5,
.key_specs = (RedisModuleCommandKeySpec[]){
{
.notes = "UPDATE instead of INSERT because of the optional trimming feature",
.flags = REDISMODULE_CMD_KEY_RW | REDISMODULE_CMD_KEY_UPDATE,
.begin_search_type = REDISMODULE_KSPEC_BS_INDEX,
.bs.index.pos = 1,
.find_keys_type = REDISMODULE_KSPEC_FK_RANGE,
.fk.range = {0,1,0}
},
{0}
},
.args = (RedisModuleCommandArg[]){
{
.name = "key",
.type = REDISMODULE_ARG_TYPE_KEY,
.key_spec_index = 0
},
{
.name = "nomkstream",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "NOMKSTREAM",
.since = "6.2.0",
.flags = REDISMODULE_CMD_ARG_OPTIONAL
},
{
.name = "trim",
.type = REDISMODULE_ARG_TYPE_BLOCK,
.flags = REDISMODULE_CMD_ARG_OPTIONAL,
.subargs = (RedisModuleCommandArg[]){
{
.name = "strategy",
.type = REDISMODULE_ARG_TYPE_ONEOF,
.subargs = (RedisModuleCommandArg[]){
{
.name = "maxlen",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "MAXLEN",
},
{
.name = "minid",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "MINID",
.since = "6.2.0",
},
{0}
}
},
{
.name = "operator",
.type = REDISMODULE_ARG_TYPE_ONEOF,
.flags = REDISMODULE_CMD_ARG_OPTIONAL,
.subargs = (RedisModuleCommandArg[]){
{
.name = "equal",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "="
},
{
.name = "approximately",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "~"
},
{0}
}
},
{
.name = "threshold",
.type = REDISMODULE_ARG_TYPE_STRING,
.display_text = "threshold" /* Just for coverage, doesn't have a visible effect */
},
{
.name = "count",
.type = REDISMODULE_ARG_TYPE_INTEGER,
.token = "LIMIT",
.since = "6.2.0",
.flags = REDISMODULE_CMD_ARG_OPTIONAL
},
{0}
}
},
{
.name = "id-selector",
.type = REDISMODULE_ARG_TYPE_ONEOF,
.subargs = (RedisModuleCommandArg[]){
{
.name = "auto-id",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "*"
},
{
.name = "id",
.type = REDISMODULE_ARG_TYPE_STRING,
},
{0}
}
},
{
.name = "data",
.type = REDISMODULE_ARG_TYPE_BLOCK,
.flags = REDISMODULE_CMD_ARG_MULTIPLE,
.subargs = (RedisModuleCommandArg[]){
{
.name = "field",
.type = REDISMODULE_ARG_TYPE_STRING,
},
{
.name = "value",
.type = REDISMODULE_ARG_TYPE_STRING,
},
{0}
}
},
{0}
}
};
RedisModuleCommand *cmd;
if (RedisModule_CreateCommand(ctx,"modulecrash.xadd", cmd_crash,"write deny-oom random fast",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
cmd = RedisModule_GetCommand(ctx,"modulecrash.xadd");
if (RedisModule_SetCommandInfo(cmd, &info) == REDISMODULE_ERR)
return REDISMODULE_ERR;
/* Create a subcommand: modulecrash.parent sub
* It will crash in the command handler to verify we print subcommand name
* when hide-user-data-from-log config is enabled */
RedisModuleCommandInfo subcommand_info = {
.version = REDISMODULE_COMMAND_INFO_VERSION,
.arity = -5,
.key_specs = (RedisModuleCommandKeySpec[]){
{
.flags = REDISMODULE_CMD_KEY_RW | REDISMODULE_CMD_KEY_UPDATE,
.begin_search_type = REDISMODULE_KSPEC_BS_INDEX,
.bs.index.pos = 1,
.find_keys_type = REDISMODULE_KSPEC_FK_RANGE,
.fk.range = {0,1,0}
},
{0}
},
.args = (RedisModuleCommandArg[]){
{
.name = "key",
.type = REDISMODULE_ARG_TYPE_KEY,
.key_spec_index = 0
},
{
.name = "token",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "TOKEN",
.flags = REDISMODULE_CMD_ARG_OPTIONAL
},
{
.name = "data",
.type = REDISMODULE_ARG_TYPE_BLOCK,
.subargs = (RedisModuleCommandArg[]){
{
.name = "field",
.type = REDISMODULE_ARG_TYPE_STRING,
},
{
.name = "value",
.type = REDISMODULE_ARG_TYPE_STRING,
},
{0}
}
},
{0}
}
};
if (RedisModule_CreateCommand(ctx,"modulecrash.parent",NULL,"",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
RedisModuleCommand *parent = RedisModule_GetCommand(ctx,"modulecrash.parent");
if (RedisModule_CreateSubcommand(parent,"subcmd",cmd_crash,"",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
cmd = RedisModule_GetCommand(ctx,"modulecrash.parent|subcmd");
if (RedisModule_SetCommandInfo(cmd, &subcommand_info) == REDISMODULE_ERR)
return REDISMODULE_ERR;
/* Create modulecrash.zunion command which is similar to zunion command.
* It will crash in the command handler to verify we print command tokens
* when hide-user-data-from-log config is enabled */
RedisModuleCommandInfo zunioninfo = {
.version = REDISMODULE_COMMAND_INFO_VERSION,
.arity = -5,
.key_specs = (RedisModuleCommandKeySpec[]){
{
.flags = REDISMODULE_CMD_KEY_RO,
.begin_search_type = REDISMODULE_KSPEC_BS_INDEX,
.bs.index.pos = 1,
.find_keys_type = REDISMODULE_KSPEC_FK_KEYNUM,
.fk.keynum = {0,1,1}
},
{0}
},
.args = (RedisModuleCommandArg[]){
{
.name = "numkeys",
.type = REDISMODULE_ARG_TYPE_INTEGER,
},
{
.name = "key",
.type = REDISMODULE_ARG_TYPE_KEY,
.key_spec_index = 0,
.flags = REDISMODULE_CMD_ARG_MULTIPLE
},
{
.name = "weights",
.type = REDISMODULE_ARG_TYPE_INTEGER,
.token = "WEIGHTS",
.flags = REDISMODULE_CMD_ARG_OPTIONAL | REDISMODULE_CMD_ARG_MULTIPLE
},
{
.name = "aggregate",
.type = REDISMODULE_ARG_TYPE_ONEOF,
.token = "AGGREGATE",
.flags = REDISMODULE_CMD_ARG_OPTIONAL,
.subargs = (RedisModuleCommandArg[]){
{
.name = "sum",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "sum"
},
{
.name = "min",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "min"
},
{
.name = "max",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "max"
},
{0}
}
},
{
.name = "withscores",
.type = REDISMODULE_ARG_TYPE_PURE_TOKEN,
.token = "WITHSCORES",
.flags = REDISMODULE_CMD_ARG_OPTIONAL
},
{0}
}
};
if (RedisModule_CreateCommand(ctx,"modulecrash.zunion", cmd_crash,"readonly",0,0,0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
cmd = RedisModule_GetCommand(ctx,"modulecrash.zunion");
if (RedisModule_SetCommandInfo(cmd, &zunioninfo) == REDISMODULE_ERR)
return REDISMODULE_ERR;
return REDISMODULE_OK;
}

View File

@ -312,3 +312,12 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
return REDISMODULE_OK;
}
int RedisModule_OnUnload(RedisModuleCtx *ctx) {
REDISMODULE_NOT_USED(ctx);
if (datatype) {
RedisModule_Free(datatype);
datatype = NULL;
}
return REDISMODULE_OK;
}

View File

@ -161,13 +161,14 @@ size_t FragFreeEffort(RedisModuleString *key, const void *value) {
}
int FragDefrag(RedisModuleDefragCtx *ctx, RedisModuleString *key, void **value) {
REDISMODULE_NOT_USED(key);
unsigned long i = 0;
int steps = 0;
int dbid = RedisModule_GetDbIdFromDefragCtx(ctx);
RedisModule_Assert(dbid != -1);
RedisModule_Log(NULL, "notice", "Defrag key: %s", RedisModule_StringPtrLen(key, NULL));
/* Attempt to get cursor, validate it's what we're exepcting */
if (RedisModule_DefragCursorGet(ctx, &i) == REDISMODULE_OK) {
if (i > 0) datatype_resumes++;

View File

@ -117,6 +117,67 @@ int test_open_key_subexpired_hget(RedisModuleCtx *ctx, RedisModuleString **argv,
return REDISMODULE_OK;
}
int test_open_key_hget_expire(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc<3) {
RedisModule_WrongArity(ctx);
return REDISMODULE_OK;
}
RedisModuleKey *key = openKeyWithMode(ctx, argv[1], REDISMODULE_OPEN_KEY_ACCESS_EXPIRED);
if (!key) return REDISMODULE_OK;
mstime_t expireAt;
/* Let's test here that we get error if using invalid flags combination */
RedisModule_Assert(
RedisModule_HashGet(key,
REDISMODULE_HASH_EXISTS |
REDISMODULE_HASH_EXPIRE_TIME,
argv[2], &expireAt, NULL) == REDISMODULE_ERR);
/* Now let's get the expire time */
RedisModule_HashGet(key, REDISMODULE_HASH_EXPIRE_TIME,argv[2],&expireAt,NULL);
RedisModule_ReplyWithLongLong(ctx, expireAt);
RedisModule_CloseKey(key);
return REDISMODULE_OK;
}
/* Test variadic function to get two expiration times */
int test_open_key_hget_two_expire(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc<3) {
RedisModule_WrongArity(ctx);
return REDISMODULE_OK;
}
RedisModuleKey *key = openKeyWithMode(ctx, argv[1], REDISMODULE_OPEN_KEY_ACCESS_EXPIRED);
if (!key) return REDISMODULE_OK;
mstime_t expireAt1, expireAt2;
RedisModule_HashGet(key,REDISMODULE_HASH_EXPIRE_TIME,argv[2],&expireAt1,argv[3],&expireAt2,NULL);
/* return the two expire time */
RedisModule_ReplyWithArray(ctx, 2);
RedisModule_ReplyWithLongLong(ctx, expireAt1);
RedisModule_ReplyWithLongLong(ctx, expireAt2);
RedisModule_CloseKey(key);
return REDISMODULE_OK;
}
int test_open_key_hget_min_expire(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
if (argc!=2) {
RedisModule_WrongArity(ctx);
return REDISMODULE_OK;
}
RedisModuleKey *key = openKeyWithMode(ctx, argv[1], REDISMODULE_READ);
if (!key) return REDISMODULE_OK;
volatile mstime_t minExpire = RedisModule_HashFieldMinExpire(key);
RedisModule_ReplyWithLongLong(ctx, minExpire);
RedisModule_CloseKey(key);
return REDISMODULE_OK;
}
int numReplies;
void ScanCallback(RedisModuleKey *key, RedisModuleString *field, RedisModuleString *value, void *privdata) {
UNUSED(key);
@ -172,6 +233,12 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx, "hash.hscan_expired", test_open_key_access_expired_hscan,"", 0, 0, 0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx, "hash.hget_expire", test_open_key_hget_expire,"", 0, 0, 0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx, "hash.hget_two_expire", test_open_key_hget_two_expire,"", 0, 0, 0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
if (RedisModule_CreateCommand(ctx, "hash.hget_min_expire", test_open_key_hget_min_expire,"", 0, 0, 0) == REDISMODULE_ERR)
return REDISMODULE_ERR;
return REDISMODULE_OK;
}

View File

@ -1,11 +1,12 @@
#include "redismodule.h"
#include <strings.h>
int mutable_bool_val;
int mutable_bool_val, no_prefix_bool, no_prefix_bool2;
int immutable_bool_val;
long long longval;
long long memval;
long long longval, no_prefix_longval;
long long memval, no_prefix_memval;
RedisModuleString *strval = NULL;
int enumval;
RedisModuleString *strval2 = NULL;
int enumval, no_prefix_enumval;
int flagsval;
/* Series of get and set callbacks for each type of config, these rely on the privdata ptr
@ -103,6 +104,36 @@ int longlongApplyFunc(RedisModuleCtx *ctx, void *privdata, RedisModuleString **e
return REDISMODULE_OK;
}
RedisModuleString *getStringConfigUnprefix(const char *name, void *privdata) {
REDISMODULE_NOT_USED(name);
REDISMODULE_NOT_USED(privdata);
return strval2;
}
int setStringConfigUnprefix(const char *name, RedisModuleString *new, void *privdata, RedisModuleString **err) {
REDISMODULE_NOT_USED(name);
REDISMODULE_NOT_USED(err);
REDISMODULE_NOT_USED(privdata);
if (strval2) RedisModule_FreeString(NULL, strval2);
RedisModule_RetainString(NULL, new);
strval2 = new;
return REDISMODULE_OK;
}
int getEnumConfigUnprefix(const char *name, void *privdata) {
REDISMODULE_NOT_USED(name);
REDISMODULE_NOT_USED(privdata);
return no_prefix_enumval;
}
int setEnumConfigUnprefix(const char *name, int val, void *privdata, RedisModuleString **err) {
REDISMODULE_NOT_USED(name);
REDISMODULE_NOT_USED(err);
REDISMODULE_NOT_USED(privdata);
no_prefix_enumval = val;
return REDISMODULE_OK;
}
int registerBlockCheck(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) {
REDISMODULE_NOT_USED(argv);
REDISMODULE_NOT_USED(argc);
@ -168,6 +199,30 @@ int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc)
if (RedisModule_RegisterNumericConfig(ctx, "numeric", -1, REDISMODULE_CONFIG_DEFAULT, -5, 2000, getNumericConfigCommand, setNumericConfigCommand, longlongApplyFunc, &longval) == REDISMODULE_ERR) {
return REDISMODULE_ERR;
}
/*** unprefixed and aliased configuration ***/
if (RedisModule_RegisterBoolConfig(ctx, "unprefix-bool|unprefix-bool-alias", 1, REDISMODULE_CONFIG_DEFAULT|REDISMODULE_CONFIG_UNPREFIXED,
getBoolConfigCommand, setBoolConfigCommand, NULL, &no_prefix_bool) == REDISMODULE_ERR) {
return REDISMODULE_ERR;
}
if (RedisModule_RegisterBoolConfig(ctx, "unprefix-noalias-bool", 1, REDISMODULE_CONFIG_DEFAULT|REDISMODULE_CONFIG_UNPREFIXED,
getBoolConfigCommand, setBoolConfigCommand, NULL, &no_prefix_bool2) == REDISMODULE_ERR) {
return REDISMODULE_ERR;
}
if (RedisModule_RegisterNumericConfig(ctx, "unprefix.numeric|unprefix.numeric-alias", -1, REDISMODULE_CONFIG_DEFAULT|REDISMODULE_CONFIG_UNPREFIXED,
-5, 2000, getNumericConfigCommand, setNumericConfigCommand, NULL, &no_prefix_longval) == REDISMODULE_ERR) {
return REDISMODULE_ERR;
}
if (RedisModule_RegisterStringConfig(ctx, "unprefix-string|unprefix.string-alias", "secret unprefix", REDISMODULE_CONFIG_DEFAULT|REDISMODULE_CONFIG_UNPREFIXED,
getStringConfigUnprefix, setStringConfigUnprefix, NULL, NULL) == REDISMODULE_ERR) {
return REDISMODULE_ERR;
}
if (RedisModule_RegisterEnumConfig(ctx, "unprefix-enum|unprefix-enum-alias", 1, REDISMODULE_CONFIG_DEFAULT|REDISMODULE_CONFIG_UNPREFIXED,
enum_vals, int_vals, 5, getEnumConfigUnprefix, setEnumConfigUnprefix, NULL, NULL) == REDISMODULE_ERR) {
return REDISMODULE_ERR;
}
size_t len;
if (argc && !strcasecmp(RedisModule_StringPtrLen(argv[0], &len), "noload")) {
return REDISMODULE_OK;
@ -191,5 +246,9 @@ int RedisModule_OnUnload(RedisModuleCtx *ctx) {
RedisModule_FreeString(ctx, strval);
strval = NULL;
}
if (strval2) {
RedisModule_FreeString(ctx, strval2);
strval2 = NULL;
}
return REDISMODULE_OK;
}

View File

@ -373,6 +373,8 @@ proc run_external_server_test {code overrides} {
r flushall
r function flush
r script flush
r config resetstat
# store configs
set saved_config {}

View File

@ -1,3 +1,16 @@
#
# Copyright (c) 2009-Present, Redis Ltd.
# All rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of the Redis Source Available License 2.0
# (RSALv2) or the Server Side Public License v1 (SSPLv1).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
proc randstring {min max {type binary}} {
set len [expr {$min+int(rand()*($max-$min+1))}]
set output {}
@ -118,11 +131,11 @@ proc wait_for_sync r {
}
}
proc wait_replica_online r {
wait_for_condition 50 100 {
[string match "*slave0:*,state=online*" [$r info replication]]
proc wait_replica_online {r {replica_id 0} {maxtries 50} {delay 100}} {
wait_for_condition $maxtries $delay {
[string match "*slave$replica_id:*,state=online*" [$r info replication]]
} else {
fail "replica didn't online in time"
fail "replica $replica_id did not become online in time"
}
}
@ -565,10 +578,11 @@ proc find_valgrind_errors {stderr on_termination} {
}
# Execute a background process writing random data for the specified number
# of seconds to the specified Redis instance.
proc start_write_load {host port seconds} {
# of seconds to the specified Redis instance. If key is omitted, a random key
# is used for every SET command.
proc start_write_load {host port seconds {key ""}} {
set tclsh [info nameofexecutable]
exec $tclsh tests/helpers/gen_write_load.tcl $host $port $seconds $::tls &
exec $tclsh tests/helpers/gen_write_load.tcl $host $port $seconds $::tls $key &
}
# Stop a process generating write load executed with start_write_load.
@ -677,6 +691,12 @@ proc pause_process pid {
}
proc resume_process pid {
wait_for_condition 50 1000 {
[string match "T*" [exec ps -o state= -p $pid]]
} else {
puts [exec ps j $pid]
fail "process was not stopped"
}
exec kill -SIGCONT $pid
}
@ -698,6 +718,16 @@ proc latencyrstat_percentiles {cmd r} {
}
}
proc get_io_thread_clients {id {client r}} {
set pattern "io_thread_$id:clients=(\[0-9\]+)"
set info [$client info threads]
if {[regexp $pattern $info _ value]} {
return $value
} else {
return -1
}
}
proc generate_fuzzy_traffic_on_key {key type duration} {
# Commands per type, blocking commands removed
# TODO: extract these from COMMAND DOCS, and improve to include other types

View File

@ -116,6 +116,32 @@ start_server {tags {"acl external:skip"}} {
assert_match "*NOPERM*key*" $err
}
test {Validate read and write permissions format - empty permission} {
catch {r ACL SETUSER key-permission-RW %~} err
set err
} {ERR Error in ACL SETUSER modifier '%~': Syntax error}
test {Validate read and write permissions format - empty selector} {
catch {r ACL SETUSER key-permission-RW %} err
set err
} {ERR Error in ACL SETUSER modifier '%': Syntax error}
test {Validate read and write permissions format - empty pattern} {
# Empty pattern results with R/W access to no key
r ACL SETUSER key-permission-RW on nopass %RW~ +@all
$r2 auth key-permission-RW password
catch {$r2 SET x 5} err
set err
} {NOPERM No permissions to access a key}
test {Validate read and write permissions format - no pattern} {
# No pattern results with R/W access to no key (currently we accept this syntax error)
r ACL SETUSER key-permission-RW on nopass %RW +@all
$r2 auth key-permission-RW password
catch {$r2 SET x 5} err
set err
} {NOPERM No permissions to access a key}
test {Test separate read and write permissions on different selectors are not additive} {
r ACL SETUSER key-permission-RW-selector on nopass "(%R~read* +@all)" "(%W~write* +@all)"
$r2 auth key-permission-RW-selector password

View File

@ -1,3 +1,16 @@
#
# Copyright (c) 2009-Present, Redis Ltd.
# All rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of the Redis Source Available License 2.0
# (RSALv2) or the Server Side Public License v1 (SSPLv1).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
start_server {tags {"auth external:skip"}} {
test {AUTH fails if there is no password configured server side} {
catch {r auth foo} err
@ -65,12 +78,16 @@ start_server {tags {"auth_binary_password external:skip"}} {
set master_port [srv -1 port]
set slave [srv 0 client]
test {MASTERAUTH test with binary password} {
foreach rdbchannel {yes no} {
test "MASTERAUTH test with binary password rdbchannel=$rdbchannel" {
$slave slaveof no one
$master config set requirepass "abc\x00def"
$master config set repl-rdb-channel $rdbchannel
# Configure the replica with masterauth
set loglines [count_log_lines 0]
$slave config set masterauth "abc"
$slave config set repl-rdb-channel $rdbchannel
$slave slaveof $master_host $master_port
# Verify replica is not able to sync with master
@ -86,4 +103,5 @@ start_server {tags {"auth_binary_password external:skip"}} {
}
}
}
}
}

View File

@ -108,7 +108,11 @@ start_server {} {
$rr write [join [list "*1\r\n\$$maxmemory_clients_actual\r\n" [string repeat v $maxmemory_clients_actual]] ""]
$rr flush
} e
assert {![client_exists $cname]}
wait_for_condition 100 10 {
![client_exists $cname]
} else {
fail "Failed to evict client"
}
$rr close
# Restore settings
@ -360,6 +364,13 @@ start_server {} {
resume_process $server_pid
r ping ;# make sure a full event loop cycle is processed before issuing CLIENT LIST
# wait for get commands to be processed
wait_for_condition 100 10 {
[expr {[regexp {calls=(\d+)} [cmdrstat get r] -> calls] ? $calls : 0}] >= 2
} else {
fail "get did not arrive"
}
# Validate obuf-clients were disconnected (because of obuf limit)
catch {client_field obuf-client1 name} e
assert_match {no client named obuf-client1 found*} $e
@ -367,7 +378,9 @@ start_server {} {
assert_match {no client named obuf-client2 found*} $e
# Validate qbuf-client is still connected and wasn't evicted
if {[lindex [r config get io-threads] 1] == 1} {
assert_equal [client_field qbuf-client name] {qbuf-client}
}
$rr1 close
$rr2 close
@ -404,8 +417,11 @@ start_server {} {
# Decrease maxmemory_clients and expect client eviction
r config set maxmemory-clients [expr $maxmemory_clients / 2]
set connected_clients [llength [lsearch -all [split [string trim [r client list]] "\r\n"] *name=client*]]
assert {$connected_clients > 0 && $connected_clients < $client_count}
wait_for_condition 200 10 {
[llength [regexp -all -inline {name=client} [r client list]]] < $client_count
} else {
fail "Failed to evict clients"
}
foreach rr $rrs {$rr close}
}
@ -463,8 +479,11 @@ start_server {} {
assert {$total_client_mem <= $maxmemory_clients}
# Make sure we have only half of our clients now
set connected_clients [llength [lsearch -all [split [string trim [r client list]] "\r\n"] *name=client*]]
assert {$connected_clients == [expr $client_count / 2]}
wait_for_condition 200 100 {
[llength [regexp -all -inline {name=client} [r client list]]] == $client_count / 2
} else {
fail "Failed to evict clients"
}
# Restore the reply buffer resize to default
r debug replybuffer resizing 1
@ -519,7 +538,8 @@ start_server {} {
foreach size [lreverse $sizes] {
set control_mem [client_field control tot-mem]
set total_mem [expr $total_mem - $clients_per_size * $size]
r config set maxmemory-clients [expr $total_mem + $control_mem]
# allow some tolerance when using io threads
r config set maxmemory-clients [expr $total_mem + $control_mem + 1000]
set clients [split [string trim [r client list]] "\r\n"]
# Verify only relevant clients were evicted
for {set i 0} {$i < [llength $sizes]} {incr i} {

View File

@ -222,6 +222,46 @@ start_server {tags {"hll"}} {
assert_equal 3 [r pfcount destkey]
}
test {PFMERGE results with simd} {
r del hllscalar{t} hllsimd{t} hll1{t} hll2{t} hll3{t}
for {set x 1} {$x < 2000} {incr x} {
r pfadd hll1{t} [expr rand()]
}
for {set x 1} {$x < 4000} {incr x} {
r pfadd hll2{t} [expr rand()]
}
for {set x 1} {$x < 8000} {incr x} {
r pfadd hll3{t} [expr rand()]
}
assert {[r pfcount hll1{t}] > 0}
assert {[r pfcount hll2{t}] > 0}
assert {[r pfcount hll3{t}] > 0}
r pfdebug simd off
set scalar [r pfcount hll1{t} hll2{t} hll3{t}]
r pfdebug simd on
set simd [r pfcount hll1{t} hll2{t} hll3{t}]
assert {$scalar > 0}
assert {$simd > 0}
assert_equal $scalar $simd
r pfdebug simd off
r pfmerge hllscalar{t} hll1{t} hll2{t} hll3{t}
r pfdebug simd on
r pfmerge hllsimd{t} hll1{t} hll2{t} hll3{t}
set scalar [r pfcount hllscalar{t}]
set simd [r pfcount hllsimd{t}]
assert {$scalar > 0}
assert {$simd > 0}
assert_equal $scalar $simd
set scalar [r get hllscalar{t}]
set simd [r get hllsimd{t}]
assert_equal $scalar $simd
} {} {needs:pfdebug}
test {PFCOUNT multiple-keys merge returns cardinality of union #1} {
r del hll1{t} hll2{t} hll3{t}
for {set x 1} {$x < 10000} {incr x} {

View File

@ -0,0 +1,454 @@
################################################################################
# Test the "info keysizes" command.
# The command returns a histogram of the sizes of keys in the database.
################################################################################
# Query and Strip result of "info keysizes" from header, spaces, and newlines.
proc get_stripped_info {server} {
set infoStripped [string map {
"# Keysizes" ""
" " "" "\n" "" "\r" ""
} [$server info keysizes] ]
return $infoStripped
}
# Verify output of "info keysizes" command is as expected.
#
# Arguments:
# cmd - A command that should be run before the verification.
# expOutput - This is a string that represents the expected output abbreviated.
# Instead of the output of "strings_len_exp_distrib" write "STR".
# Similarly for LIST, SET, ZSET and HASH. Spaces and newlines are
# ignored.
# waitCond - If set to 1, the function wait_for_condition 50x50msec for the
# expOutput to match the actual output.
#
# (replicaMode) - Global variable that indicates if the test is running in replica
# mode. If so, run the command on leader, verify the output. Then wait
# for the replica to catch up and verify the output on the replica
# as well. Otherwise, just run the command on the leader and verify
# the output.
proc run_cmd_verify_hist {cmd expOutput {waitCond 0} } {
uplevel 1 $cmd
global replicaMode
# ref the leader with `server` variable
if {$replicaMode eq 1} { set server [srv -1 client] } else { set server [srv 0 client] }
# Replace all placeholders with the actual values. Remove spaces & newlines.
set expStripped [string map {
"STR" "distrib_strings_sizes"
"LIST" "distrib_lists_items"
"SET" "distrib_sets_items"
"ZSET" "distrib_zsets_items"
"HASH" "distrib_hashes_items"
" " "" "\n" "" "\r" ""
} $expOutput]
if {$waitCond} {
wait_for_condition 50 50 {
$expStripped eq [get_stripped_info $server]
} else {
fail "Unexpected KEYSIZES. Expected: `$expStripped` \
but got: `[get_stripped_info $server]`. Failed after command: $cmd"
}
} else {
set infoStripped [get_stripped_info $server]
if {$expStripped ne $infoStripped} {
fail "Unexpected KEYSIZES. Expected: `$expStripped` \
but got: `$infoStripped`. Failed after command: $cmd"
}
}
# If we are testing `replicaMode` then need to wait for the replica to catch up
if {$replicaMode eq 1} {
wait_for_condition 50 50 {
$expStripped eq [get_stripped_info $server]
} else {
fail "Unexpected replica KEYSIZES. Expected: `$expStripped` \
but got: `[get_stripped_info $server]`. Failed after command: $cmd"
}
}
}
proc test_all_keysizes { {replMode 0} } {
# If in replica mode then update global var `replicaMode` so function
# `run_cmd_verify_hist` knows to run the command on the leader and then
# wait for the replica to catch up.
global replicaMode
set replicaMode $replMode
# ref the leader with `server` variable
if {$replicaMode eq 1} {
set server [srv -1 client]
set suffixRepl "(replica)"
} else {
set server [srv 0 client]
set suffixRepl ""
}
test "KEYSIZES - Test i'th bin counts keysizes between (2^i) and (2^(i+1)-1) as expected $suffixRepl" {
set base_string ""
run_cmd_verify_hist {$server FLUSHALL} {}
for {set i 1} {$i <= 10} {incr i} {
append base_string "x"
set log_value [expr {1 << int(log($i) / log(2))}]
#puts "Iteration $i: $base_string (Log base 2 pattern: $log_value)"
run_cmd_verify_hist {$server set mykey $base_string} "db0_STR:$log_value=1"
}
}
test "KEYSIZES - Histogram of values of Bytes, Kilo and Mega $suffixRepl" {
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server set x 0123456789ABCDEF} {db0_STR:16=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:32=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:64=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:128=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:256=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:512=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:1K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:2K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:4K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:8K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:16K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:32K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:64K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:128K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:256K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:512K=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:1M=1}
run_cmd_verify_hist {$server APPEND x [$server get x]} {db0_STR:2M=1}
}
test "KEYSIZES - Test List $suffixRepl" {
# FLUSHALL
run_cmd_verify_hist {$server FLUSHALL} {}
# RPUSH
run_cmd_verify_hist {$server RPUSH l1 1 2 3 4 5} {db0_LIST:4=1}
run_cmd_verify_hist {$server RPUSH l1 6 7 8 9} {db0_LIST:8=1}
# Test also LPUSH, RPUSH, LPUSHX, RPUSHX
run_cmd_verify_hist {$server LPUSH l2 1} {db0_LIST:1=1,8=1}
run_cmd_verify_hist {$server LPUSH l2 2} {db0_LIST:2=1,8=1}
run_cmd_verify_hist {$server LPUSHX l2 3} {db0_LIST:2=1,8=1}
run_cmd_verify_hist {$server RPUSHX l2 4} {db0_LIST:4=1,8=1}
# RPOP
run_cmd_verify_hist {$server RPOP l1} {db0_LIST:4=1,8=1}
run_cmd_verify_hist {$server RPOP l1} {db0_LIST:4=2}
# DEL
run_cmd_verify_hist {$server DEL l1} {db0_LIST:4=1}
# LINSERT, LTRIM
run_cmd_verify_hist {$server RPUSH l3 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14} {db0_LIST:4=1,8=1}
run_cmd_verify_hist {$server LINSERT l3 AFTER 9 10} {db0_LIST:4=1,16=1}
run_cmd_verify_hist {$server LTRIM l3 0 8} {db0_LIST:4=1,8=1}
# DEL
run_cmd_verify_hist {$server DEL l3} {db0_LIST:4=1}
run_cmd_verify_hist {$server DEL l2} {}
# LMOVE, BLMOVE
run_cmd_verify_hist {$server RPUSH l4 1 2 3 4 5 6 7 8} {db0_LIST:8=1}
run_cmd_verify_hist {$server LMOVE l4 l5 LEFT LEFT} {db0_LIST:1=1,4=1}
run_cmd_verify_hist {$server LMOVE l4 l5 RIGHT RIGHT} {db0_LIST:2=1,4=1}
run_cmd_verify_hist {$server LMOVE l4 l5 LEFT RIGHT} {db0_LIST:2=1,4=1}
run_cmd_verify_hist {$server LMOVE l4 l5 RIGHT LEFT} {db0_LIST:4=2}
run_cmd_verify_hist {$server BLMOVE l4 l5 RIGHT LEFT 0} {db0_LIST:2=1,4=1}
# DEL
run_cmd_verify_hist {$server DEL l4} {db0_LIST:4=1}
run_cmd_verify_hist {$server DEL l5} {}
# LMPOP
run_cmd_verify_hist {$server RPUSH l6 1 2 3 4 5 6 7 8 9 10} {db0_LIST:8=1}
run_cmd_verify_hist {$server LMPOP 1 l6 LEFT COUNT 2} {db0_LIST:8=1}
run_cmd_verify_hist {$server LMPOP 1 l6 LEFT COUNT 1} {db0_LIST:4=1}
run_cmd_verify_hist {$server LMPOP 1 l6 LEFT COUNT 6} {db0_LIST:1=1}
# LPOP
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server RPUSH l7 1 2 3 4} {db0_LIST:4=1}
run_cmd_verify_hist {$server LPOP l7} {db0_LIST:2=1}
# LREM
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server RPUSH l8 1 x 3 x 5 x 7 x 9 10} {db0_LIST:8=1}
run_cmd_verify_hist {$server LREM l8 3 x} {db0_LIST:4=1}
# EXPIRE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server RPUSH l9 1 2 3 4} {db0_LIST:4=1}
run_cmd_verify_hist {$server PEXPIRE l9 50} {db0_LIST:4=1}
run_cmd_verify_hist {} {} 1
# SET overwrites
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server RPUSH l9 1 2 3 4} {db0_LIST:4=1}
run_cmd_verify_hist {$server SET l9 1234567} {db0_STR:4=1}
run_cmd_verify_hist {$server DEL l9} {}
} {} {cluster:skip}
test "KEYSIZES - Test SET $suffixRepl" {
run_cmd_verify_hist {$server FLUSHALL} {}
# SADD
run_cmd_verify_hist {$server SADD s1 1 2 3 4 5} {db0_SET:4=1}
run_cmd_verify_hist {$server SADD s1 6 7 8} {db0_SET:8=1}
# Test also SADD, SREM, SMOVE, SPOP
run_cmd_verify_hist {$server SADD s2 1} {db0_SET:1=1,8=1}
run_cmd_verify_hist {$server SADD s2 2} {db0_SET:2=1,8=1}
run_cmd_verify_hist {$server SREM s2 3} {db0_SET:2=1,8=1}
run_cmd_verify_hist {$server SMOVE s2 s3 2} {db0_SET:1=2,8=1}
run_cmd_verify_hist {$server SPOP s3} {db0_SET:1=1,8=1}
run_cmd_verify_hist {$server SPOP s2} {db0_SET:8=1}
run_cmd_verify_hist {$server SPOP s1} {db0_SET:4=1}
run_cmd_verify_hist {$server del s1} {}
# SDIFFSTORE
run_cmd_verify_hist {$server flushall} {}
run_cmd_verify_hist {$server SADD s1 1 2 3 4 5 6 7 8} {db0_SET:8=1}
run_cmd_verify_hist {$server SADD s2 6 7 8 9 A B C D} {db0_SET:8=2}
run_cmd_verify_hist {$server SDIFFSTORE s3 s1 s2} {db0_SET:4=1,8=2}
#SINTERSTORE
run_cmd_verify_hist {$server flushall} {}
run_cmd_verify_hist {$server SADD s1 1 2 3 4 5 6 7 8} {db0_SET:8=1}
run_cmd_verify_hist {$server SADD s2 6 7 8 9 A B C D} {db0_SET:8=2}
run_cmd_verify_hist {$server SINTERSTORE s3 s1 s2} {db0_SET:2=1,8=2}
#SUNIONSTORE
run_cmd_verify_hist {$server flushall} {}
run_cmd_verify_hist {$server SADD s1 1 2 3 4 5 6 7 8} {db0_SET:8=1}
run_cmd_verify_hist {$server SADD s2 6 7 8 9 A B C D} {db0_SET:8=2}
run_cmd_verify_hist {$server SUNIONSTORE s3 s1 s2} {db0_SET:8=3}
run_cmd_verify_hist {$server SADD s4 E F G H} {db0_SET:4=1,8=3}
run_cmd_verify_hist {$server SUNIONSTORE s5 s3 s4} {db0_SET:4=1,8=3,16=1}
# DEL
run_cmd_verify_hist {$server flushall} {}
run_cmd_verify_hist {$server SADD s1 1 2 3 4 5 6 7 8} {db0_SET:8=1}
run_cmd_verify_hist {$server DEL s1} {}
# EXPIRE
run_cmd_verify_hist {$server flushall} {}
run_cmd_verify_hist {$server SADD s1 1 2 3 4 5 6 7 8} {db0_SET:8=1}
run_cmd_verify_hist {$server PEXPIRE s1 50} {db0_SET:8=1}
run_cmd_verify_hist {} {} 1
# SET overwrites
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server SADD s1 1 2 3 4 5 6 7 8} {db0_SET:8=1}
run_cmd_verify_hist {$server SET s1 1234567} {db0_STR:4=1}
run_cmd_verify_hist {$server DEL s1} {}
} {} {cluster:skip}
test "KEYSIZES - Test ZSET $suffixRepl" {
# ZADD, ZREM
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZADD z1 6 f 7 g 8 h 9 i} {db0_ZSET:8=1}
run_cmd_verify_hist {$server ZADD z2 1 a} {db0_ZSET:1=1,8=1}
run_cmd_verify_hist {$server ZREM z1 a} {db0_ZSET:1=1,8=1}
run_cmd_verify_hist {$server ZREM z1 b} {db0_ZSET:1=1,4=1}
# ZREMRANGEBYSCORE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZREMRANGEBYSCORE z1 -inf (2} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZREMRANGEBYSCORE z1 -inf (3} {db0_ZSET:2=1}
# ZREMRANGEBYRANK
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e 6 f} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZREMRANGEBYRANK z1 0 1} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZREMRANGEBYRANK z1 0 0} {db0_ZSET:2=1}
# ZREMRANGEBYLEX
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 0 a 0 b 0 c 0 d 0 e 0 f} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZREMRANGEBYLEX z1 - (d} {db0_ZSET:2=1}
# ZUNIONSTORE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZADD z2 6 f 7 g 8 h 9 i} {db0_ZSET:4=2}
run_cmd_verify_hist {$server ZUNIONSTORE z3 2 z1 z2} {db0_ZSET:4=2,8=1}
# ZINTERSTORE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZADD z2 3 c 4 d 5 e 6 f} {db0_ZSET:4=2}
run_cmd_verify_hist {$server ZINTERSTORE z3 2 z1 z2} {db0_ZSET:2=1,4=2}
# BZPOPMIN, BZPOPMAX
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server BZPOPMIN z1 0} {db0_ZSET:4=1}
run_cmd_verify_hist {$server BZPOPMAX z1 0} {db0_ZSET:2=1}
# ZDIFFSTORE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZADD z2 3 c 4 d 5 e 6 f} {db0_ZSET:4=2}
run_cmd_verify_hist {$server ZDIFFSTORE z3 2 z1 z2} {db0_ZSET:2=1,4=2}
# ZINTERSTORE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server ZADD z2 3 c 4 d 5 e 6 f} {db0_ZSET:4=2}
run_cmd_verify_hist {$server ZINTERSTORE z3 2 z1 z2} {db0_ZSET:2=1,4=2}
# DEL
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server DEL z1} {}
# EXPIRE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server PEXPIRE z1 50} {db0_ZSET:4=1}
run_cmd_verify_hist {} {} 1
# SET overwrites
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c 4 d 5 e} {db0_ZSET:4=1}
run_cmd_verify_hist {$server SET z1 1234567} {db0_STR:4=1}
run_cmd_verify_hist {$server DEL z1} {}
} {} {cluster:skip}
test "KEYSIZES - Test STRING $suffixRepl" {
# SETRANGE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server SET s2 1234567890} {db0_STR:8=1}
run_cmd_verify_hist {$server SETRANGE s2 10 123456} {db0_STR:16=1}
# MSET, MSETNX
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server MSET s3 1 s4 2 s5 3} {db0_STR:1=3}
run_cmd_verify_hist {$server MSETNX s6 1 s7 2 s8 3} {db0_STR:1=6}
# DEL
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server SET s9 1234567890} {db0_STR:8=1}
run_cmd_verify_hist {$server DEL s9} {}
#EXPIRE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server SET s10 1234567890} {db0_STR:8=1}
run_cmd_verify_hist {$server PEXPIRE s10 50} {db0_STR:8=1}
run_cmd_verify_hist {} {} 1
# SET (+overwrite)
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server SET s1 1024} {db0_STR:4=1}
run_cmd_verify_hist {$server SET s1 842} {db0_STR:2=1}
run_cmd_verify_hist {$server SET s1 2} {db0_STR:1=1}
run_cmd_verify_hist {$server SET s1 1234567} {db0_STR:4=1}
} {} {cluster:skip}
foreach type {listpackex hashtable} {
# Test different implementations of hash tables and listpacks
if {$type eq "hashtable"} {
$server config set hash-max-listpack-entries 0
} else {
$server config set hash-max-listpack-entries 512
}
test "KEYSIZES - Test HASH ($type) $suffixRepl" {
# HSETNX
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server HSETNX h1 1 1} {db0_HASH:1=1}
run_cmd_verify_hist {$server HSETNX h1 2 2} {db0_HASH:2=1}
# HSET, HDEL
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server HSET h2 1 1} {db0_HASH:1=1}
run_cmd_verify_hist {$server HSET h2 2 2} {db0_HASH:2=1}
run_cmd_verify_hist {$server HDEL h2 1} {db0_HASH:1=1}
run_cmd_verify_hist {$server HDEL h2 2} {}
# HMSET
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server HMSET h1 1 1 2 2 3 3} {db0_HASH:2=1}
run_cmd_verify_hist {$server HMSET h1 1 1 2 2 3 3} {db0_HASH:2=1}
run_cmd_verify_hist {$server HMSET h1 1 1 2 2 3 3 4 4} {db0_HASH:4=1}
# HINCRBY
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server hincrby h1 f1 10} {db0_HASH:1=1}
run_cmd_verify_hist {$server hincrby h1 f1 10} {db0_HASH:1=1}
run_cmd_verify_hist {$server hincrby h1 f2 20} {db0_HASH:2=1}
# HINCRBYFLOAT
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server hincrbyfloat h1 f1 10.5} {db0_HASH:1=1}
run_cmd_verify_hist {$server hincrbyfloat h1 f1 10.5} {db0_HASH:1=1}
run_cmd_verify_hist {$server hincrbyfloat h1 f2 10.5} {db0_HASH:2=1}
# HEXPIRE
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server HSET h1 f1 1} {db0_HASH:1=1}
run_cmd_verify_hist {$server HSET h1 f2 1} {db0_HASH:2=1}
run_cmd_verify_hist {$server HPEXPIREAT h1 1 FIELDS 1 f1} {db0_HASH:1=1}
run_cmd_verify_hist {$server HSET h1 f3 1} {db0_HASH:2=1}
run_cmd_verify_hist {$server HPEXPIRE h1 50 FIELDS 1 f2} {db0_HASH:2=1}
run_cmd_verify_hist {} {db0_HASH:1=1} 1
run_cmd_verify_hist {$server HPEXPIRE h1 50 FIELDS 1 f3} {db0_HASH:1=1}
run_cmd_verify_hist {} {} 1
}
}
test "KEYSIZES - Test STRING BITS $suffixRepl" {
# BITOPS
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server SET b1 "x123456789"} {db0_STR:8=1}
run_cmd_verify_hist {$server SET b2 "x12345678"} {db0_STR:8=2}
run_cmd_verify_hist {$server BITOP AND b3 b1 b2} {db0_STR:8=3}
run_cmd_verify_hist {$server BITOP OR b4 b1 b2} {db0_STR:8=4}
run_cmd_verify_hist {$server BITOP XOR b5 b1 b2} {db0_STR:8=5}
# SETBIT
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server setbit b1 71 1} {db0_STR:8=1}
run_cmd_verify_hist {$server setbit b1 72 1} {db0_STR:8=1}
run_cmd_verify_hist {$server setbit b2 72 1} {db0_STR:8=2}
run_cmd_verify_hist {$server setbit b2 640 0} {db0_STR:8=1,64=1}
# BITFIELD
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server bitfield b3 set u8 6 255} {db0_STR:2=1}
run_cmd_verify_hist {$server bitfield b3 set u8 65 255} {db0_STR:8=1}
run_cmd_verify_hist {$server bitfield b4 set u8 1000 255} {db0_STR:8=1,64=1}
} {} {cluster:skip}
test "KEYSIZES - Test RESTORE $suffixRepl" {
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server RPUSH l10 1 2 3 4} {db0_LIST:4=1}
set encoded [$server dump l10]
run_cmd_verify_hist {$server del l10} {}
run_cmd_verify_hist {$server restore l11 0 $encoded} {db0_LIST:4=1}
}
test "KEYSIZES - Test RENAME $suffixRepl" {
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server RPUSH l12 1 2 3 4} {db0_LIST:4=1}
run_cmd_verify_hist {$server RENAME l12 l13} {db0_LIST:4=1}
} {} {cluster:skip}
test "KEYSIZES - Test MOVE $suffixRepl" {
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server RPUSH l1 1 2 3 4} {db0_LIST:4=1}
run_cmd_verify_hist {$server RPUSH l2 1} {db0_LIST:1=1,4=1}
run_cmd_verify_hist {$server MOVE l1 1} {db0_LIST:1=1 db1_LIST:4=1}
} {} {cluster:skip}
test "KEYSIZES - Test SWAPDB $suffixRepl" {
run_cmd_verify_hist {$server FLUSHALL} {}
run_cmd_verify_hist {$server RPUSH l1 1 2 3 4} {db0_LIST:4=1}
$server select 1
run_cmd_verify_hist {$server ZADD z1 1 A} {db0_LIST:4=1 db1_ZSET:1=1}
run_cmd_verify_hist {$server SWAPDB 0 1} {db0_ZSET:1=1 db1_LIST:4=1}
$server select 0
} {OK} {singledb:skip}
test "KEYSIZES - Test RDB $suffixRepl" {
run_cmd_verify_hist {$server FLUSHALL} {}
# Write list, set and zset to db0
run_cmd_verify_hist {$server RPUSH l1 1 2 3 4} {db0_LIST:4=1}
run_cmd_verify_hist {$server SADD s1 1 2 3 4 5} {db0_LIST:4=1 db0_SET:4=1}
run_cmd_verify_hist {$server ZADD z1 1 a 2 b 3 c} {db0_LIST:4=1 db0_SET:4=1 db0_ZSET:2=1}
run_cmd_verify_hist {$server SAVE} {db0_LIST:4=1 db0_SET:4=1 db0_ZSET:2=1}
if {$replicaMode eq 1} {
run_cmd_verify_hist {restart_server -1 true false} {db0_LIST:4=1 db0_SET:4=1 db0_ZSET:2=1}
} else {
run_cmd_verify_hist {restart_server 0 true false} {db0_LIST:4=1 db0_SET:4=1 db0_ZSET:2=1}
}
} {} {external:skip}
}
start_server {} {
# Test KEYSIZES on a single server
r select 0
test_all_keysizes 0
# Start another server to test replication of KEYSIZES
start_server {tags {needs:repl external:skip}} {
# Set the outer layer server as primary
set primary [srv -1 client]
set primary_host [srv -1 host]
set primary_port [srv -1 port]
# Set this inner layer server as replica
set replica [srv 0 client]
# Server should have role replica
$replica replicaof $primary_host $primary_port
wait_for_condition 50 100 { [s 0 role] eq {slave} } else { fail "Replication not started." }
# Test KEYSIZES on leader and replica
$primary select 0
test_all_keysizes 1
}
}

View File

@ -1,3 +1,16 @@
#
# Copyright (c) 2009-Present, Redis Ltd.
# All rights reserved.
#
# Copyright (c) 2024-present, Valkey contributors.
# All rights reserved.
#
# Licensed under your choice of the Redis Source Available License 2.0
# (RSALv2) or the Server Side Public License v1 (SSPLv1).
#
# Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
#
proc cmdstat {cmd} {
return [cmdrstat $cmd r]
}
@ -313,7 +326,7 @@ start_server {tags {"info" "external:skip"}} {
assert_lessthan $cycle2 [expr $cycle1+10] ;# we expect 2 or 3 cycles here, but allow some tolerance
if {$::verbose} { puts "eventloop metrics el_sum1: $el_sum1, el_sum2: $el_sum2" }
assert_morethan $el_sum2 $el_sum1
assert_lessthan $el_sum2 [expr $el_sum1+30000] ;# we expect roughly 100ms here, but allow some tolerance
assert_lessthan $el_sum2 [expr $el_sum1+100000] ;# we expect roughly 100ms here, but allow some tolerance
if {$::verbose} { puts "eventloop metrics cmd_sum1: $cmd_sum1, cmd_sum2: $cmd_sum2" }
assert_morethan $cmd_sum2 $cmd_sum1
assert_lessthan $cmd_sum2 [expr $cmd_sum1+15000] ;# we expect about tens of ms here, but allow some tolerance
@ -386,10 +399,10 @@ start_server {tags {"info" "external:skip"}} {
r config set client-output-buffer-limit "normal 10 0 0"
r set key [string repeat a 100000] ;# to trigger output buffer limit check this needs to be big
catch {r get key}
r config set client-output-buffer-limit $org_outbuf_limit
set info [r info stats]
assert_equal [getInfoProperty $info client_output_buffer_limit_disconnections] {1}
r config set client-output-buffer-limit $org_outbuf_limit
} {OK} {logreqres:skip} ;# same as obuf-limits.tcl, skip logreqres
} {} {logreqres:skip} ;# same as obuf-limits.tcl, skip logreqres
test {clients: pubsub clients} {
set info [r info clients]

View File

@ -6,8 +6,13 @@ start_server {tags {"introspection"}} {
}
test {CLIENT LIST} {
r client list
} {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|list user=* redir=-1 resp=*}
set client_list [r client list]
if {[lindex [r config get io-threads] 1] == 1} {
assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|list user=* redir=-1 resp=*} $client_list
} else {
assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=0 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|list user=* redir=-1 resp=*} $client_list
}
}
test {CLIENT LIST with IDs} {
set myid [r client id]
@ -16,8 +21,13 @@ start_server {tags {"introspection"}} {
}
test {CLIENT INFO} {
r client info
} {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|info user=* redir=-1 resp=*}
set client [r client info]
if {[lindex [r config get io-threads] 1] == 1} {
assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=26 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|info user=* redir=-1 resp=*} $client
} else {
assert_match {id=* addr=*:* laddr=*:* fd=* name=* age=* idle=* flags=N db=* sub=0 psub=0 ssub=0 multi=-1 watch=0 qbuf=0 qbuf-free=* argv-mem=* multi-mem=0 rbs=* rbp=* obl=0 oll=0 omem=0 tot-mem=* events=r cmd=client|info user=* redir=-1 resp=*} $client
}
}
test {CLIENT KILL with illegal arguments} {
assert_error "ERR wrong number of arguments for 'client|kill' command" {r client kill}
@ -86,6 +96,11 @@ start_server {tags {"introspection"}} {
assert {$connected_clients >= 3}
set res [r client kill skipme yes]
assert {$res == $connected_clients - 1}
wait_for_condition 1000 10 {
[s connected_clients] eq 1
} else {
fail "Can't kill all clients except the current one"
}
# Kill all clients, including `me`
set rd3 [redis_deferring_client]
@ -304,6 +319,9 @@ start_server {tags {"introspection"}} {
$rd read ; # Discard the OK
$bc blpop mylist 0
# make sure the blpop arrives first
$bc flush
after 100
wait_for_blocked_clients_count 1
r lpush mylist 1
wait_for_blocked_clients_count 0
@ -904,3 +922,62 @@ test {CONFIG REWRITE handles alias config properly} {
assert_equal [r config get hash-max-listpack-entries] {hash-max-listpack-entries 100}
}
} {} {external:skip}
test {IO threads client number} {
start_server {overrides {io-threads 2} tags {external:skip}} {
set iothread_clients [get_io_thread_clients 1]
assert_equal $iothread_clients [s connected_clients]
assert_equal [get_io_thread_clients 0] 0
r script debug yes ; # Transfer to main thread
assert_equal [get_io_thread_clients 0] 1
assert_equal [get_io_thread_clients 1] [expr $iothread_clients - 1]
set iothread_clients [get_io_thread_clients 1]
set rd1 [redis_deferring_client]
set rd2 [redis_deferring_client]
assert_equal [get_io_thread_clients 1] [expr $iothread_clients + 2]
$rd1 close
$rd2 close
wait_for_condition 1000 10 {
[get_io_thread_clients 1] eq $iothread_clients
} else {
fail "Fail to close clients of io thread 1"
}
assert_equal [get_io_thread_clients 0] 1
r script debug no ; # Transfer to io thread
assert_equal [get_io_thread_clients 0] 0
assert_equal [get_io_thread_clients 1] [expr $iothread_clients + 1]
}
}
test {Clients are evenly distributed among io threads} {
start_server {overrides {io-threads 4} tags {external:skip}} {
set cur_clients [s connected_clients]
assert_equal $cur_clients 1
global rdclients
for {set i 1} {$i < 9} {incr i} {
set rdclients($i) [redis_deferring_client]
}
for {set i 1} {$i <= 3} {incr i} {
assert_equal [get_io_thread_clients $i] 3
}
$rdclients(3) close
$rdclients(4) close
wait_for_condition 1000 10 {
[get_io_thread_clients 1] eq 2 &&
[get_io_thread_clients 2] eq 2 &&
[get_io_thread_clients 3] eq 3
} else {
fail "Fail to close clients"
}
set $rdclients(3) [redis_deferring_client]
set $rdclients(4) [redis_deferring_client]
for {set i 1} {$i <= 3} {incr i} {
assert_equal [get_io_thread_clients $i] 3
}
}
}

View File

@ -29,7 +29,11 @@ start_server {tags {"maxmemory" "external:skip"}} {
set dbsize [r dbsize]
if $client_eviction {
if {[lindex [r config get io-threads] 1] == 1} {
return [expr $evicted_clients > 0 && $evicted_keys == 0 && $dbsize == 50]
} else {
return [expr $evicted_clients >= 0 && $evicted_keys >= 0 && $dbsize <= 50]
}
} else {
return [expr $evicted_clients == 0 && $evicted_keys > 0 && $dbsize < 50]
}

Some files were not shown because too many files have changed in this diff Show More