mirror of https://mirror.osredm.com/root/redis.git
Use Hadamard-based projection.
Works better and being deterministic (only relative to the projection size) the replicas will have the same matrix automatically.
This commit is contained in:
parent
958ebee091
commit
8bf9b8abc1
52
vset.c
52
vset.c
|
@ -63,21 +63,53 @@ struct vsetNodeVal {
|
||||||
RedisModuleString *attrib;
|
RedisModuleString *attrib;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Create a random projection matrix for dimensionality reduction.
|
/* Count the number of set bits in an integer (population count/Hamming weight).
|
||||||
* Returns NULL on allocation failure. Matrix is scaled by 1/sqrt(input_dim). */
|
* This is a portable implementation that doesn't rely on compiler extensions. */
|
||||||
|
static inline uint32_t bit_count(uint32_t n) {
|
||||||
|
uint32_t count = 0;
|
||||||
|
while (n) {
|
||||||
|
count += n & 1;
|
||||||
|
n >>= 1;
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create a Hadamard-based projection matrix for dimensionality reduction.
|
||||||
|
* Uses {-1, +1} entries with a pattern based on bit operations.
|
||||||
|
* The pattern is matrix[i][j] = (i & j) % 2 == 0 ? 1 : -1
|
||||||
|
* Matrix is scaled by 1/sqrt(input_dim) for normalization.
|
||||||
|
* Returns NULL on allocation failure.
|
||||||
|
*
|
||||||
|
* Note that compared to other approaches (random gaussian weights), what
|
||||||
|
* we have here is deterministic, it means that our replicas will have
|
||||||
|
* the same set of weights. Also this approach seems to work much better
|
||||||
|
* in pratice, and the distances between elements are better guaranteed.
|
||||||
|
*
|
||||||
|
* Note that we still save the projection matrix in the RDB file, because
|
||||||
|
* in the future we may change the weights generation, and we want everything
|
||||||
|
* to be backward compatible. */
|
||||||
float *createProjectionMatrix(uint32_t input_dim, uint32_t output_dim) {
|
float *createProjectionMatrix(uint32_t input_dim, uint32_t output_dim) {
|
||||||
float *matrix = RedisModule_Alloc(sizeof(float) * input_dim * output_dim);
|
float *matrix = RedisModule_Alloc(sizeof(float) * input_dim * output_dim);
|
||||||
if (!matrix) return NULL;
|
|
||||||
|
|
||||||
|
/* Scale factor to normalize the projection. */
|
||||||
const float scale = 1.0f / sqrt(input_dim);
|
const float scale = 1.0f / sqrt(input_dim);
|
||||||
for (uint32_t i = 0; i < input_dim * output_dim; i++) {
|
|
||||||
/* Box-Muller transform for normal distribution */
|
/* Fill the matrix using Hadamard pattern. */
|
||||||
float u1 = (float)rand() / RAND_MAX;
|
for (uint32_t i = 0; i < output_dim; i++) {
|
||||||
float u2 = (float)rand() / RAND_MAX;
|
for (uint32_t j = 0; j < input_dim; j++) {
|
||||||
float r = sqrt(-2.0f * log(u1));
|
/* Calculate position in the flattened matrix. */
|
||||||
float theta = 2.0f * M_PI * u2;
|
uint32_t pos = i * input_dim + j;
|
||||||
matrix[i] = r * cos(theta) * scale;
|
|
||||||
|
/* Hadamard pattern: use bit operations to determine sign
|
||||||
|
* If the count of 1-bits in the bitwise AND of i and j is even,
|
||||||
|
* the value is 1, otherwise -1. */
|
||||||
|
int value = (bit_count(i & j) % 2 == 0) ? 1 : -1;
|
||||||
|
|
||||||
|
/* Store the scaled value. */
|
||||||
|
matrix[pos] = value * scale;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return matrix;
|
return matrix;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue