Optimize SCAN with MATCH when pattern implies cluster slot (#12536)

Optimize the performance of SCAN commands when a match pattern can only contain keys from a 
single slot in cluster mode. This can happen when the pattern contains a hash tag before any 
wildcard matchers or when the key contains no matchers.
This commit is contained in:
Viktor Söderqvist 2023-11-01 08:06:49 +01:00 committed by GitHub
parent e9f312e087
commit 8878817d89
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 86 additions and 5 deletions

View File

@ -1367,6 +1367,36 @@ unsigned int keyHashSlot(char *key, int keylen) {
return crc16(key+s+1,e-s-1) & 0x3FFF;
}
/* If it can be inferred that the given glob-style pattern, as implemented in
* stringmatchlen() in util.c, only can match keys belonging to a single slot,
* that slot is returned. Otherwise -1 is returned. */
int patternHashSlot(char *pattern, int length) {
int s = -1; /* index of the first '{' */
for (int i = 0; i < length; i++) {
if (pattern[i] == '*' || pattern[i] == '?' || pattern[i] == '[') {
/* Wildcard or character class found. Keys can be in any slot. */
return -1;
} else if (pattern[i] == '\\') {
/* Escaped character. Computing slot in this case is not
* implemented. We would need a temp buffer. */
return -1;
} else if (s == -1 && pattern[i] == '{') {
/* Opening brace '{' found. */
s = i;
} else if (s >= 0 && pattern[i] == '}' && i == s + 1) {
/* Empty tag '{}' found. The whole key is hashed. Ignore braces. */
s = -2;
} else if (s >= 0 && pattern[i] == '}') {
/* Non-empty tag '{...}' found. Hash what's between braces. */
return crc16(pattern + s + 1, i - s - 1) & 0x3FFF;
}
}
/* The pattern matches a single key. Hash the whole pattern. */
return crc16(pattern, length) & 0x3FFF;
}
/* -----------------------------------------------------------------------------
* CLUSTER node API
* -------------------------------------------------------------------------- */

View File

@ -403,6 +403,7 @@ unsigned long getClusterConnectionsCount(void);
int clusterSendModuleMessageToTarget(const char *target, uint64_t module_id, uint8_t type, const char *payload, uint32_t len);
void clusterPropagatePublish(robj *channel, robj *message, int sharded);
unsigned int keyHashSlot(char *key, int keylen);
int patternHashSlot(char *pattern, int length);
void clusterUpdateMyselfFlags(void);
void clusterUpdateMyselfIp(void);
void slotToChannelAdd(sds channel);

View File

@ -1255,11 +1255,17 @@ void scanGenericCommand(client *c, robj *o, unsigned long long cursor) {
.pattern = use_pattern ? pat : NULL,
.sampled = 0,
};
/* A pattern may restrict all matching keys to one cluster slot. */
int onlyslot = -1;
if (o == NULL && use_pattern && server.cluster_enabled) {
onlyslot = patternHashSlot(pat, patlen);
}
do {
/* In cluster mode there is a separate dictionary for each slot.
* If cursor is empty, we should try exploring next non-empty slot. */
if (o == NULL) {
cursor = dbScan(c->db, DB_MAIN, cursor, scanCallback, NULL, &data);
cursor = dbScan(c->db, DB_MAIN, cursor, onlyslot, scanCallback, NULL, &data);
} else {
cursor = dictScan(ht, cursor, scanCallback, &data);
}
@ -1426,14 +1432,30 @@ dictEntry *dbFind(redisDb *db, void *key, dbKeyType keyType){
* it performs a dictScan over the appropriate `keyType` dictionary of `db`.
* 3. If the slot is entirely scanned i.e. the cursor has reached 0, the next non empty slot is discovered.
* The slot information is embedded into the cursor and returned.
*
* To restrict the scan to a single cluster slot, pass a valid slot as
* 'onlyslot', otherwise pass -1.
*/
unsigned long long dbScan(redisDb *db, dbKeyType keyType, unsigned long long v, dictScanFunction *fn, int (dictScanValidFunction)(dict *d), void *privdata) {
unsigned long long dbScan(redisDb *db, dbKeyType keyType, unsigned long long v,
int onlyslot, dictScanFunction *fn,
int (dictScanValidFunction)(dict *d), void *privdata) {
dict *d;
unsigned long long cursor = 0;
/* During main dictionary traversal in cluster mode, 48 lower bits in the cursor are used for positioning in the HT.
* Following 14 bits are used for the slot number, ranging from 0 to 2^14-1.
* Slot is always 0 at the start of iteration and can be incremented only in cluster mode. */
int slot = getAndClearSlotIdFromCursor(&v);
if (onlyslot >= 0) {
if (slot < onlyslot) {
/* Fast-forward to onlyslot. */
serverAssert(onlyslot < CLUSTER_SLOTS);
slot = onlyslot;
v = 0;
} else if (slot > onlyslot) {
/* The cursor is already past onlyslot. */
return 0;
}
}
if (keyType == DB_MAIN)
d = db->dict[slot];
else if (keyType == DB_EXPIRES)
@ -1449,6 +1471,8 @@ unsigned long long dbScan(redisDb *db, dbKeyType keyType, unsigned long long v,
}
/* scanning done for the current dictionary or if the scanning wasn't possible, move to the next slot. */
if (cursor == 0 || !is_dict_valid) {
if (onlyslot >= 0)
return 0;
slot = dbGetNextNonEmptySlot(db, slot, keyType);
}
if (slot == -1) {

View File

@ -275,7 +275,7 @@ void activeExpireCycle(int type) {
long checked_buckets = 0;
while (data.sampled < num && checked_buckets < max_buckets) {
db->expires_cursor = dbScan(db, DB_EXPIRES, db->expires_cursor, expireScanCallback, isExpiryDictValidForSamplingCb, &data);
db->expires_cursor = dbScan(db, DB_EXPIRES, db->expires_cursor, -1, expireScanCallback, isExpiryDictValidForSamplingCb, &data);
if (db->expires_cursor == 0) {
break;
}

View File

@ -10981,7 +10981,7 @@ int RM_Scan(RedisModuleCtx *ctx, RedisModuleScanCursor *cursor, RedisModuleScanC
}
int ret = 1;
ScanCBData data = { ctx, privdata, fn };
cursor->cursor = dbScan(ctx->client->db, DB_MAIN, cursor->cursor, moduleScanCallback, NULL, &data);
cursor->cursor = dbScan(ctx->client->db, DB_MAIN, cursor->cursor, -1, moduleScanCallback, NULL, &data);
if (cursor->cursor == 0) {
cursor->done = 1;
ret = 0;

View File

@ -3120,7 +3120,9 @@ int calculateKeySlot(sds key);
unsigned long dbBuckets(redisDb *db, dbKeyType keyType);
size_t dbMemUsage(redisDb *db, dbKeyType keyType);
dictEntry *dbFind(redisDb *db, void *key, dbKeyType keyType);
unsigned long long dbScan(redisDb *db, dbKeyType keyType, unsigned long long cursor, dictScanFunction *fn, int (dictScanValidFunction)(dict *d), void *privdata);
unsigned long long dbScan(redisDb *db, dbKeyType keyType, unsigned long long cursor,
int onlyslot, dictScanFunction *fn,
int (dictScanValidFunction)(dict *d), void *privdata);
int dbExpand(const redisDb *db, uint64_t db_size, dbKeyType keyType, int try_expand);
unsigned long long cumulativeKeyCountRead(redisDb *db, int idx, dbKeyType keyType);
int getFairRandomSlot(redisDb *db, dbKeyType keyType);

View File

@ -430,6 +430,30 @@ proc test_scan {type} {
}
}
}
test "{$type} SCAN MATCH pattern implies cluster slot" {
# Tests the code path for an optimization for patterns like "{foo}-*"
# which implies that all matching keys belong to one slot.
r flushdb
for {set j 0} {$j < 100} {incr j} {
r set "{foo}-$j" "foo"; # slot 12182
r set "{bar}-$j" "bar"; # slot 5061
r set "{boo}-$j" "boo"; # slot 13142
}
set cursor 0
set keys {}
while 1 {
set res [r scan $cursor match "{foo}-*"]
set cursor [lindex $res 0]
set k [lindex $res 1]
lappend keys {*}$k
if {$cursor == 0} break
}
set keys [lsort -unique $keys]
assert_equal 100 [llength $keys]
}
}
start_server {tags {"scan network standalone"}} {