Optimize dictFind with prefetching and branch prediction hints (#13646)

This pull request optimizes the `dictFind` function by adding software
prefetching and branch prediction hints to improve cache efficiency and
reduce memory latency.
It introduces 2 prefetch hints (read/write) that became no-ops in case
the compiler does not support it.

Baseline profiling with Intel VTune indicated that dictFind was
significantly back-end bound, with memory latency accounting for 59.6%
of clockticks, with frequent stalls from DRAM-bound operations due to
cache misses during hash table lookups.

![microarch](https://github.com/user-attachments/assets/9e3cf334-ae6b-4767-b568-713a4ac24e87)

---------

Co-authored-by: Yuan Wang <wangyuancode@163.com>
This commit is contained in:
Filipe Oliveira (Redis) 2024-12-04 09:16:14 +00:00 committed by GitHub
parent 2af69a931a
commit ddafac4c6c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 29 additions and 1 deletions

View File

@ -101,6 +101,25 @@
#endif
#endif
/* Test for __builtin_prefetch()
* Supported in LLVM since 2.9: https://releases.llvm.org/2.9/docs/ReleaseNotes.html
* Supported in GCC since 3.1 but we use 4.9 given it's too old: https://gcc.gnu.org/gcc-3.1/changes.html. */
#if defined(__clang__) && (__clang_major__ > 2 || (__clang_major__ == 2 && __clang_minor__ >= 9))
#define HAS_BUILTIN_PREFETCH 1
#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9))
#define HAS_BUILTIN_PREFETCH 1
#else
#define HAS_BUILTIN_PREFETCH 0
#endif
#if HAS_BUILTIN_PREFETCH
#define redis_prefetch_read(addr) __builtin_prefetch(addr, 0, 3) /* Read with high locality */
#define redis_prefetch_write(addr) __builtin_prefetch(addr, 1, 3) /* Write with high locality */
#else
#define redis_prefetch_read(addr) ((void)(addr)) /* No-op if unsupported */
#define redis_prefetch_write(addr) ((void)(addr)) /* No-op if unsupported */
#endif
/* Define redis_fsync to fdatasync() in Linux and fsync() for all the rest */
#if defined(__linux__)
#define redis_fsync(fd) fdatasync(fd)

View File

@ -760,14 +760,23 @@ dictEntry *dictFind(dict *d, const void *key)
for (table = 0; table <= 1; table++) {
if (table == 0 && (long)idx < d->rehashidx) continue;
idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
/* Prefetch the bucket at the calculated index */
redis_prefetch_read(&d->ht_table[table][idx]);
he = d->ht_table[table][idx];
while(he) {
void *he_key = dictGetKey(he);
/* Prefetch the next entry to improve cache efficiency */
redis_prefetch_read(dictGetNext(he));
if (key == he_key || cmpFunc(d, key, he_key))
return he;
he = dictGetNext(he);
}
if (!dictIsRehashing(d)) return NULL;
/* Use unlikely to optimize branch prediction for the common case */
if (unlikely(!dictIsRehashing(d))) return NULL;
}
return NULL;
}