Use hashtable as the default type of temp set object during sunion/sdiff (#13567)

This PR is based on https://github.com/valkey-io/valkey/pull/996


Currently, for operations like SUNION or SDIFF, temporary set object can
be intset or listpack. Search operation is costly for these encodings.
This patch tries to set the temporary set object as hash table by
default. It also tries to determine correct encoding for the temporary
set object to reduce the unnecessary conversation.

This change is supposed to give performance boost for tests like:
-
[memtier_benchmark-2keys-set-10-100-elements-sdiff](https://github.com/redis/redis-benchmarks-specification/blob/main/redis_benchmarks_specification/test-suites/memtier_benchmark-2keys-set-10-100-elements-sdiff.yml)
66.2% IMPROVEMENT
-
[memtier_benchmark-2keys-set-10-100-elements-sunion](https://github.com/redis/redis-benchmarks-specification/blob/main/redis_benchmarks_specification/test-suites/memtier_benchmark-2keys-set-10-100-elements-sunion.yml)
126.5% IMPROVEMENT

-------
Co-authored-by: Lipeng Zhu <lipeng.zhu@intel.com>
Co-authored-by: Wangyang Guo <wangyang.guo@intel.com>

Co-authored-by: Lipeng Zhu <lipeng.zhu@intel.com>
Co-authored-by: Wangyang Guo <wangyang.guo@intel.com>
This commit is contained in:
Ozan Tezcan 2024-09-25 12:41:17 +03:00 committed by GitHub
parent 26ef28467a
commit 99c40ab53d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 28 additions and 1 deletions

View File

@ -2,8 +2,13 @@
* Copyright (c) 2009-Present, Redis Ltd. * Copyright (c) 2009-Present, Redis Ltd.
* All rights reserved. * All rights reserved.
* *
* Copyright (c) 2024-present, Valkey contributors.
* All rights reserved.
*
* Licensed under your choice of the Redis Source Available License 2.0 * Licensed under your choice of the Redis Source Available License 2.0
* (RSALv2) or the Server Side Public License v1 (SSPLv1). * (RSALv2) or the Server Side Public License v1 (SSPLv1).
*
* Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
*/ */
#include "server.h" #include "server.h"
@ -1492,6 +1497,7 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
robj **sets = zmalloc(sizeof(robj*)*setnum); robj **sets = zmalloc(sizeof(robj*)*setnum);
setTypeIterator *si; setTypeIterator *si;
robj *dstset = NULL; robj *dstset = NULL;
int dstset_encoding = OBJ_ENCODING_INTSET;
char *str; char *str;
size_t len; size_t len;
int64_t llval; int64_t llval;
@ -1510,6 +1516,23 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
zfree(sets); zfree(sets);
return; return;
} }
/* For a SET's encoding, according to the factory method setTypeCreate(), currently have 3 types:
* 1. OBJ_ENCODING_INTSET
* 2. OBJ_ENCODING_LISTPACK
* 3. OBJ_ENCODING_HT
* 'dstset_encoding' is used to determine which kind of encoding to use when initialize 'dstset'.
*
* If all sets are all OBJ_ENCODING_INTSET encoding or 'dstkey' is not null, keep 'dstset'
* OBJ_ENCODING_INTSET encoding when initialize. Otherwise it is not efficient to create the 'dstset'
* from intset and then convert to listpack or hashtable.
*
* If one of the set is OBJ_ENCODING_LISTPACK, let's set 'dstset' to hashtable default encoding,
* the hashtable is more efficient when find and compare than the listpack. The corresponding
* time complexity are O(1) vs O(n). */
if (!dstkey && dstset_encoding == OBJ_ENCODING_INTSET &&
(setobj->encoding == OBJ_ENCODING_LISTPACK || setobj->encoding == OBJ_ENCODING_HT)) {
dstset_encoding = OBJ_ENCODING_HT;
}
sets[j] = setobj; sets[j] = setobj;
if (j > 0 && sets[0] == sets[j]) { if (j > 0 && sets[0] == sets[j]) {
sameset = 1; sameset = 1;
@ -1552,7 +1575,11 @@ void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
/* We need a temp set object to store our union/diff. If the dstkey /* We need a temp set object to store our union/diff. If the dstkey
* is not NULL (that is, we are inside an SUNIONSTORE/SDIFFSTORE operation) then * is not NULL (that is, we are inside an SUNIONSTORE/SDIFFSTORE operation) then
* this set object will be the resulting object to set into the target key*/ * this set object will be the resulting object to set into the target key*/
dstset = createIntsetObject(); if (dstset_encoding == OBJ_ENCODING_INTSET) {
dstset = createIntsetObject();
} else {
dstset = createSetObject();
}
if (op == SET_OP_UNION) { if (op == SET_OP_UNION) {
/* Union is trivial, just add every element of every set to the /* Union is trivial, just add every element of every set to the