mirror of https://mirror.osredm.com/root/redis.git
Merge branch 'unstable' into incrementail_defrag_ebuckets
This commit is contained in:
commit
75bdf68487
|
@ -91,8 +91,8 @@ jobs:
|
|||
run: |
|
||||
apt-get update
|
||||
apt-get install -y gnupg2
|
||||
echo "deb http://dk.archive.ubuntu.com/ubuntu/ xenial main" >> /etc/apt/sources.list
|
||||
echo "deb http://dk.archive.ubuntu.com/ubuntu/ xenial universe" >> /etc/apt/sources.list
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ xenial main" >> /etc/apt/sources.list
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ xenial universe" >> /etc/apt/sources.list
|
||||
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 40976EAF437D05B5
|
||||
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 3B4FE6ACC0B21F32
|
||||
apt-get update
|
||||
|
|
|
@ -346,10 +346,10 @@ jobs:
|
|||
run: sudo apt-get install tcl8.6 tclx
|
||||
- name: test
|
||||
if: true && !contains(github.event.inputs.skiptests, 'redis')
|
||||
run: ./runtest --config io-threads 4 --config io-threads-do-reads yes --accurate --verbose --tags network --dump-logs ${{github.event.inputs.test_args}}
|
||||
run: ./runtest --config io-threads 4 --accurate --verbose --tags network --dump-logs ${{github.event.inputs.test_args}}
|
||||
- name: cluster tests
|
||||
if: true && !contains(github.event.inputs.skiptests, 'cluster')
|
||||
run: ./runtest-cluster --config io-threads 4 --config io-threads-do-reads yes ${{github.event.inputs.cluster_test_args}}
|
||||
run: ./runtest-cluster --config io-threads 4 ${{github.event.inputs.cluster_test_args}}
|
||||
|
||||
test-ubuntu-reclaim-cache:
|
||||
runs-on: ubuntu-latest
|
||||
|
@ -606,6 +606,50 @@ jobs:
|
|||
if: true && !contains(github.event.inputs.skiptests, 'unittest')
|
||||
run: ./src/redis-server test all
|
||||
|
||||
test-sanitizer-memory:
|
||||
runs-on: ubuntu-latest
|
||||
if: |
|
||||
(github.event_name == 'workflow_dispatch' || (github.event_name != 'workflow_dispatch' && github.repository == 'redis/redis')) &&
|
||||
!contains(github.event.inputs.skipjobs, 'sanitizer')
|
||||
timeout-minutes: 14400
|
||||
env:
|
||||
CC: clang # MSan work only with clang
|
||||
steps:
|
||||
- name: prep
|
||||
if: github.event_name == 'workflow_dispatch'
|
||||
run: |
|
||||
echo "GITHUB_REPOSITORY=${{github.event.inputs.use_repo}}" >> $GITHUB_ENV
|
||||
echo "GITHUB_HEAD_REF=${{github.event.inputs.use_git_ref}}" >> $GITHUB_ENV
|
||||
echo "skipjobs: ${{github.event.inputs.skipjobs}}"
|
||||
echo "skiptests: ${{github.event.inputs.skiptests}}"
|
||||
echo "test_args: ${{github.event.inputs.test_args}}"
|
||||
echo "cluster_test_args: ${{github.event.inputs.cluster_test_args}}"
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: ${{ env.GITHUB_REPOSITORY }}
|
||||
ref: ${{ env.GITHUB_HEAD_REF }}
|
||||
- name: make
|
||||
run: make SANITIZER=memory REDIS_CFLAGS='-DREDIS_TEST -Werror -DDEBUG_ASSERTIONS'
|
||||
- name: testprep
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install tcl8.6 tclx -y
|
||||
- name: test
|
||||
if: true && !contains(github.event.inputs.skiptests, 'redis')
|
||||
run: ./runtest --accurate --verbose --dump-logs ${{github.event.inputs.test_args}}
|
||||
- name: module api test
|
||||
if: true && !contains(github.event.inputs.skiptests, 'modules')
|
||||
run: SANITIZER=memory CFLAGS='-Werror' ./runtest-moduleapi --verbose --dump-logs ${{github.event.inputs.test_args}}
|
||||
- name: sentinel tests
|
||||
if: true && !contains(github.event.inputs.skiptests, 'sentinel')
|
||||
run: ./runtest-sentinel ${{github.event.inputs.cluster_test_args}}
|
||||
- name: cluster tests
|
||||
if: true && !contains(github.event.inputs.skiptests, 'cluster')
|
||||
run: ./runtest-cluster ${{github.event.inputs.cluster_test_args}}
|
||||
- name: unittest
|
||||
if: true && !contains(github.event.inputs.skiptests, 'unittest')
|
||||
run: ./src/redis-server test all
|
||||
|
||||
test-sanitizer-undefined:
|
||||
runs-on: ubuntu-latest
|
||||
if: |
|
||||
|
@ -632,7 +676,7 @@ jobs:
|
|||
repository: ${{ env.GITHUB_REPOSITORY }}
|
||||
ref: ${{ env.GITHUB_HEAD_REF }}
|
||||
- name: make
|
||||
run: make SANITIZER=undefined REDIS_CFLAGS='-DREDIS_TEST -Werror' LUA_DEBUG=yes # we (ab)use this flow to also check Lua C API violations
|
||||
run: make SANITIZER=undefined REDIS_CFLAGS='-DREDIS_TEST -Werror' SKIP_VEC_SETS=yes LUA_DEBUG=yes # we (ab)use this flow to also check Lua C API violations
|
||||
- name: testprep
|
||||
run: |
|
||||
sudo apt-get update
|
||||
|
@ -1076,8 +1120,8 @@ jobs:
|
|||
run: |
|
||||
apt-get update
|
||||
apt-get install -y gnupg2
|
||||
echo "deb http://dk.archive.ubuntu.com/ubuntu/ xenial main" >> /etc/apt/sources.list
|
||||
echo "deb http://dk.archive.ubuntu.com/ubuntu/ xenial universe" >> /etc/apt/sources.list
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ xenial main" >> /etc/apt/sources.list
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ xenial universe" >> /etc/apt/sources.list
|
||||
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 40976EAF437D05B5
|
||||
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 3B4FE6ACC0B21F32
|
||||
apt-get update
|
||||
|
@ -1125,8 +1169,8 @@ jobs:
|
|||
run: |
|
||||
apt-get update
|
||||
apt-get install -y gnupg2
|
||||
echo "deb http://dk.archive.ubuntu.com/ubuntu/ xenial main" >> /etc/apt/sources.list
|
||||
echo "deb http://dk.archive.ubuntu.com/ubuntu/ xenial universe" >> /etc/apt/sources.list
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ xenial main" >> /etc/apt/sources.list
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ xenial universe" >> /etc/apt/sources.list
|
||||
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 40976EAF437D05B5
|
||||
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 3B4FE6ACC0B21F32
|
||||
apt-get update
|
||||
|
@ -1180,8 +1224,8 @@ jobs:
|
|||
run: |
|
||||
apt-get update
|
||||
apt-get install -y gnupg2
|
||||
echo "deb http://dk.archive.ubuntu.com/ubuntu/ xenial main" >> /etc/apt/sources.list
|
||||
echo "deb http://dk.archive.ubuntu.com/ubuntu/ xenial universe" >> /etc/apt/sources.list
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ xenial main" >> /etc/apt/sources.list
|
||||
echo "deb http://archive.ubuntu.com/ubuntu/ xenial universe" >> /etc/apt/sources.list
|
||||
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 40976EAF437D05B5
|
||||
apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 3B4FE6ACC0B21F32
|
||||
apt-get update
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
By contributing code to the Redis project in any form you agree to the Redis Software Grant and
|
||||
Contributor License Agreement attached below. Only contributions made under the Redis Software Grant
|
||||
and Contributor License Agreement may be accepted by Redis, and any contribution is subject to the
|
||||
terms of the Redis dual-license under RSALv2/SSPLv1 as described in the LICENSE.txt file included in
|
||||
terms of the Redis tri-license under RSALv2/SSPLv1/AGPLv3 as described in the LICENSE.txt file included in
|
||||
the Redis source distribution.
|
||||
|
||||
# REDIS SOFTWARE GRANT AND CONTRIBUTOR LICENSE AGREEMENT
|
||||
|
@ -9,7 +9,7 @@ the Redis source distribution.
|
|||
To specify the intellectual property license granted in any Contribution, Redis Ltd., ("**Redis**")
|
||||
requires a Software Grant and Contributor License Agreement ("**Agreement**"). This Agreement is for
|
||||
your protection as a contributor as well as the protection of Redis and its users; it does not
|
||||
change your rights to use your own Contribution for any other purpose.
|
||||
change your rights to use your own Contribution for any other purpose permitted by this Agreement.
|
||||
|
||||
By making any Contribution, You accept and agree to the following terms and conditions for the
|
||||
Contribution. Except for the license granted in this Agreement to Redis and the recipients of the
|
||||
|
@ -115,4 +115,4 @@ view, and so forth. This helps.
|
|||
|
||||
4. For minor fixes - open a pull request on GitHub.
|
||||
|
||||
Additional information on the RSALv2/SSPLv1 dual-license is also found in the LICENSE.txt file.
|
||||
Additional information on the RSALv2/SSPLv1/AGPLv3 tri-license is also found in the LICENSE.txt file.
|
||||
|
|
676
LICENSE.txt
676
LICENSE.txt
|
@ -1,8 +1,11 @@
|
|||
Starting on March 20th, 2024, Redis follows a dual-licensing model with all Redis project code
|
||||
contributions under version 7.4 and subsequent releases governed by the Redis Software Grant and
|
||||
Contributor License Agreement. After this date, contributions are subject to the user's choice of
|
||||
the Redis Source Available License v2 (RSALv2) or the Server Side Public License v1 (SSPLv1), as
|
||||
follows:
|
||||
Starting with Redis 8, Redis Open Source is moving to a tri-licensing model with all new Redis code
|
||||
contributions governed by the updated Redis Software Grant and Contributor License Agreement.
|
||||
After this release, contributions are subject to your choice of: (a) the Redis Source Available License v2
|
||||
(RSALv2);or (b) the Server Side Public License v1 (SSPLv1); or (c) the GNU Affero General Public License v3 (AGPLv3).
|
||||
Redis Open Source 7.2 and prior releases remain subject to the BSDv3 clause license as referenced
|
||||
in the REDISCONTRIBUTIONS.txt file.
|
||||
|
||||
The licensing structure for Redis 8.0 and subsequent releases is as follows:
|
||||
|
||||
|
||||
1. Redis Source Available License 2.0 (RSALv2) Agreement
|
||||
|
@ -731,3 +734,666 @@ exclusive jurisdiction for all purposes relating to this Agreement.
|
|||
return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
|
||||
3. GNU AFFERO GENERAL PUBLIC LICENSE, Version 3, 19 Nov 2007
|
||||
========================================================
|
||||
|
||||
Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
|
||||
Everyone is permitted to copy and distribute verbatim copies
|
||||
of this license document, but changing it is not allowed.
|
||||
|
||||
Preamble
|
||||
|
||||
The GNU Affero General Public License is a free, copyleft license for
|
||||
software and other kinds of works, specifically designed to ensure
|
||||
cooperation with the community in the case of network server software.
|
||||
|
||||
The licenses for most software and other practical works are designed
|
||||
to take away your freedom to share and change the works. By contrast,
|
||||
our General Public Licenses are intended to guarantee your freedom to
|
||||
share and change all versions of a program--to make sure it remains free
|
||||
software for all its users.
|
||||
|
||||
When we speak of free software, we are referring to freedom, not
|
||||
price. Our General Public Licenses are designed to make sure that you
|
||||
have the freedom to distribute copies of free software (and charge for
|
||||
them if you wish), that you receive source code or can get it if you
|
||||
want it, that you can change the software or use pieces of it in new
|
||||
free programs, and that you know you can do these things.
|
||||
|
||||
Developers that use our General Public Licenses protect your rights
|
||||
with two steps: (1) assert copyright on the software, and (2) offer
|
||||
you this License which gives you legal permission to copy, distribute
|
||||
and/or modify the software.
|
||||
|
||||
A secondary benefit of defending all users' freedom is that
|
||||
improvements made in alternate versions of the program, if they
|
||||
receive widespread use, become available for other developers to
|
||||
incorporate. Many developers of free software are heartened and
|
||||
encouraged by the resulting cooperation. However, in the case of
|
||||
software used on network servers, this result may fail to come about.
|
||||
The GNU General Public License permits making a modified version and
|
||||
letting the public access it on a server without ever releasing its
|
||||
source code to the public.
|
||||
|
||||
The GNU Affero General Public License is designed specifically to
|
||||
ensure that, in such cases, the modified source code becomes available
|
||||
to the community. It requires the operator of a network server to
|
||||
provide the source code of the modified version running there to the
|
||||
users of that server. Therefore, public use of a modified version, on
|
||||
a publicly accessible server, gives the public access to the source
|
||||
code of the modified version.
|
||||
|
||||
An older license, called the Affero General Public License and
|
||||
published by Affero, was designed to accomplish similar goals. This is
|
||||
a different license, not a version of the Affero GPL, but Affero has
|
||||
released a new version of the Affero GPL which permits relicensing under
|
||||
this license.
|
||||
|
||||
The precise terms and conditions for copying, distribution and
|
||||
modification follow.
|
||||
|
||||
TERMS AND CONDITIONS
|
||||
|
||||
0. Definitions.
|
||||
|
||||
"This License" refers to version 3 of the GNU Affero General Public License.
|
||||
|
||||
"Copyright" also means copyright-like laws that apply to other kinds of
|
||||
works, such as semiconductor masks.
|
||||
|
||||
"The Program" refers to any copyrightable work licensed under this
|
||||
License. Each licensee is addressed as "you". "Licensees" and
|
||||
"recipients" may be individuals or organizations.
|
||||
|
||||
To "modify" a work means to copy from or adapt all or part of the work
|
||||
in a fashion requiring copyright permission, other than the making of an
|
||||
exact copy. The resulting work is called a "modified version" of the
|
||||
earlier work or a work "based on" the earlier work.
|
||||
|
||||
A "covered work" means either the unmodified Program or a work based
|
||||
on the Program.
|
||||
|
||||
To "propagate" a work means to do anything with it that, without
|
||||
permission, would make you directly or secondarily liable for
|
||||
infringement under applicable copyright law, except executing it on a
|
||||
computer or modifying a private copy. Propagation includes copying,
|
||||
distribution (with or without modification), making available to the
|
||||
public, and in some countries other activities as well.
|
||||
|
||||
To "convey" a work means any kind of propagation that enables other
|
||||
parties to make or receive copies. Mere interaction with a user through
|
||||
a computer network, with no transfer of a copy, is not conveying.
|
||||
|
||||
An interactive user interface displays "Appropriate Legal Notices"
|
||||
to the extent that it includes a convenient and prominently visible
|
||||
feature that (1) displays an appropriate copyright notice, and (2)
|
||||
tells the user that there is no warranty for the work (except to the
|
||||
extent that warranties are provided), that licensees may convey the
|
||||
work under this License, and how to view a copy of this License. If
|
||||
the interface presents a list of user commands or options, such as a
|
||||
menu, a prominent item in the list meets this criterion.
|
||||
|
||||
1. Source Code.
|
||||
|
||||
The "source code" for a work means the preferred form of the work
|
||||
for making modifications to it. "Object code" means any non-source
|
||||
form of a work.
|
||||
|
||||
A "Standard Interface" means an interface that either is an official
|
||||
standard defined by a recognized standards body, or, in the case of
|
||||
interfaces specified for a particular programming language, one that
|
||||
is widely used among developers working in that language.
|
||||
|
||||
The "System Libraries" of an executable work include anything, other
|
||||
than the work as a whole, that (a) is included in the normal form of
|
||||
packaging a Major Component, but which is not part of that Major
|
||||
Component, and (b) serves only to enable use of the work with that
|
||||
Major Component, or to implement a Standard Interface for which an
|
||||
implementation is available to the public in source code form. A
|
||||
"Major Component", in this context, means a major essential component
|
||||
(kernel, window system, and so on) of the specific operating system
|
||||
(if any) on which the executable work runs, or a compiler used to
|
||||
produce the work, or an object code interpreter used to run it.
|
||||
|
||||
The "Corresponding Source" for a work in object code form means all
|
||||
the source code needed to generate, install, and (for an executable
|
||||
work) run the object code and to modify the work, including scripts to
|
||||
control those activities. However, it does not include the work's
|
||||
System Libraries, or general-purpose tools or generally available free
|
||||
programs which are used unmodified in performing those activities but
|
||||
which are not part of the work. For example, Corresponding Source
|
||||
includes interface definition files associated with source files for
|
||||
the work, and the source code for shared libraries and dynamically
|
||||
linked subprograms that the work is specifically designed to require,
|
||||
such as by intimate data communication or control flow between those
|
||||
subprograms and other parts of the work.
|
||||
|
||||
The Corresponding Source need not include anything that users
|
||||
can regenerate automatically from other parts of the Corresponding
|
||||
Source.
|
||||
|
||||
The Corresponding Source for a work in source code form is that
|
||||
same work.
|
||||
|
||||
2. Basic Permissions.
|
||||
|
||||
All rights granted under this License are granted for the term of
|
||||
copyright on the Program, and are irrevocable provided the stated
|
||||
conditions are met. This License explicitly affirms your unlimited
|
||||
permission to run the unmodified Program. The output from running a
|
||||
covered work is covered by this License only if the output, given its
|
||||
content, constitutes a covered work. This License acknowledges your
|
||||
rights of fair use or other equivalent, as provided by copyright law.
|
||||
|
||||
You may make, run and propagate covered works that you do not
|
||||
convey, without conditions so long as your license otherwise remains
|
||||
in force. You may convey covered works to others for the sole purpose
|
||||
of having them make modifications exclusively for you, or provide you
|
||||
with facilities for running those works, provided that you comply with
|
||||
the terms of this License in conveying all material for which you do
|
||||
not control copyright. Those thus making or running the covered works
|
||||
for you must do so exclusively on your behalf, under your direction
|
||||
and control, on terms that prohibit them from making any copies of
|
||||
your copyrighted material outside their relationship with you.
|
||||
|
||||
Conveying under any other circumstances is permitted solely under
|
||||
the conditions stated below. Sublicensing is not allowed; section 10
|
||||
makes it unnecessary.
|
||||
|
||||
3. Protecting Users' Legal Rights From Anti-Circumvention Law.
|
||||
|
||||
No covered work shall be deemed part of an effective technological
|
||||
measure under any applicable law fulfilling obligations under article
|
||||
11 of the WIPO copyright treaty adopted on 20 December 1996, or
|
||||
similar laws prohibiting or restricting circumvention of such
|
||||
measures.
|
||||
|
||||
When you convey a covered work, you waive any legal power to forbid
|
||||
circumvention of technological measures to the extent such circumvention
|
||||
is effected by exercising rights under this License with respect to
|
||||
the covered work, and you disclaim any intention to limit operation or
|
||||
modification of the work as a means of enforcing, against the work's
|
||||
users, your or third parties' legal rights to forbid circumvention of
|
||||
technological measures.
|
||||
|
||||
4. Conveying Verbatim Copies.
|
||||
|
||||
You may convey verbatim copies of the Program's source code as you
|
||||
receive it, in any medium, provided that you conspicuously and
|
||||
appropriately publish on each copy an appropriate copyright notice;
|
||||
keep intact all notices stating that this License and any
|
||||
non-permissive terms added in accord with section 7 apply to the code;
|
||||
keep intact all notices of the absence of any warranty; and give all
|
||||
recipients a copy of this License along with the Program.
|
||||
|
||||
You may charge any price or no price for each copy that you convey,
|
||||
and you may offer support or warranty protection for a fee.
|
||||
|
||||
5. Conveying Modified Source Versions.
|
||||
|
||||
You may convey a work based on the Program, or the modifications to
|
||||
produce it from the Program, in the form of source code under the
|
||||
terms of section 4, provided that you also meet all of these conditions:
|
||||
|
||||
a) The work must carry prominent notices stating that you modified
|
||||
it, and giving a relevant date.
|
||||
|
||||
b) The work must carry prominent notices stating that it is
|
||||
released under this License and any conditions added under section
|
||||
7. This requirement modifies the requirement in section 4 to
|
||||
"keep intact all notices".
|
||||
|
||||
c) You must license the entire work, as a whole, under this
|
||||
License to anyone who comes into possession of a copy. This
|
||||
License will therefore apply, along with any applicable section 7
|
||||
additional terms, to the whole of the work, and all its parts,
|
||||
regardless of how they are packaged. This License gives no
|
||||
permission to license the work in any other way, but it does not
|
||||
invalidate such permission if you have separately received it.
|
||||
|
||||
d) If the work has interactive user interfaces, each must display
|
||||
Appropriate Legal Notices; however, if the Program has interactive
|
||||
interfaces that do not display Appropriate Legal Notices, your
|
||||
work need not make them do so.
|
||||
|
||||
A compilation of a covered work with other separate and independent
|
||||
works, which are not by their nature extensions of the covered work,
|
||||
and which are not combined with it such as to form a larger program,
|
||||
in or on a volume of a storage or distribution medium, is called an
|
||||
"aggregate" if the compilation and its resulting copyright are not
|
||||
used to limit the access or legal rights of the compilation's users
|
||||
beyond what the individual works permit. Inclusion of a covered work
|
||||
in an aggregate does not cause this License to apply to the other
|
||||
parts of the aggregate.
|
||||
|
||||
6. Conveying Non-Source Forms.
|
||||
|
||||
You may convey a covered work in object code form under the terms
|
||||
of sections 4 and 5, provided that you also convey the
|
||||
machine-readable Corresponding Source under the terms of this License,
|
||||
in one of these ways:
|
||||
|
||||
a) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by the
|
||||
Corresponding Source fixed on a durable physical medium
|
||||
customarily used for software interchange.
|
||||
|
||||
b) Convey the object code in, or embodied in, a physical product
|
||||
(including a physical distribution medium), accompanied by a
|
||||
written offer, valid for at least three years and valid for as
|
||||
long as you offer spare parts or customer support for that product
|
||||
model, to give anyone who possesses the object code either (1) a
|
||||
copy of the Corresponding Source for all the software in the
|
||||
product that is covered by this License, on a durable physical
|
||||
medium customarily used for software interchange, for a price no
|
||||
more than your reasonable cost of physically performing this
|
||||
conveying of source, or (2) access to copy the
|
||||
Corresponding Source from a network server at no charge.
|
||||
|
||||
c) Convey individual copies of the object code with a copy of the
|
||||
written offer to provide the Corresponding Source. This
|
||||
alternative is allowed only occasionally and noncommercially, and
|
||||
only if you received the object code with such an offer, in accord
|
||||
with subsection 6b.
|
||||
|
||||
d) Convey the object code by offering access from a designated
|
||||
place (gratis or for a charge), and offer equivalent access to the
|
||||
Corresponding Source in the same way through the same place at no
|
||||
further charge. You need not require recipients to copy the
|
||||
Corresponding Source along with the object code. If the place to
|
||||
copy the object code is a network server, the Corresponding Source
|
||||
may be on a different server (operated by you or a third party)
|
||||
that supports equivalent copying facilities, provided you maintain
|
||||
clear directions next to the object code saying where to find the
|
||||
Corresponding Source. Regardless of what server hosts the
|
||||
Corresponding Source, you remain obligated to ensure that it is
|
||||
available for as long as needed to satisfy these requirements.
|
||||
|
||||
e) Convey the object code using peer-to-peer transmission, provided
|
||||
you inform other peers where the object code and Corresponding
|
||||
Source of the work are being offered to the general public at no
|
||||
charge under subsection 6d.
|
||||
|
||||
A separable portion of the object code, whose source code is excluded
|
||||
from the Corresponding Source as a System Library, need not be
|
||||
included in conveying the object code work.
|
||||
|
||||
A "User Product" is either (1) a "consumer product", which means any
|
||||
tangible personal property which is normally used for personal, family,
|
||||
or household purposes, or (2) anything designed or sold for incorporation
|
||||
into a dwelling. In determining whether a product is a consumer product,
|
||||
doubtful cases shall be resolved in favor of coverage. For a particular
|
||||
product received by a particular user, "normally used" refers to a
|
||||
typical or common use of that class of product, regardless of the status
|
||||
of the particular user or of the way in which the particular user
|
||||
actually uses, or expects or is expected to use, the product. A product
|
||||
is a consumer product regardless of whether the product has substantial
|
||||
commercial, industrial or non-consumer uses, unless such uses represent
|
||||
the only significant mode of use of the product.
|
||||
|
||||
"Installation Information" for a User Product means any methods,
|
||||
procedures, authorization keys, or other information required to install
|
||||
and execute modified versions of a covered work in that User Product from
|
||||
a modified version of its Corresponding Source. The information must
|
||||
suffice to ensure that the continued functioning of the modified object
|
||||
code is in no case prevented or interfered with solely because
|
||||
modification has been made.
|
||||
|
||||
If you convey an object code work under this section in, or with, or
|
||||
specifically for use in, a User Product, and the conveying occurs as
|
||||
part of a transaction in which the right of possession and use of the
|
||||
User Product is transferred to the recipient in perpetuity or for a
|
||||
fixed term (regardless of how the transaction is characterized), the
|
||||
Corresponding Source conveyed under this section must be accompanied
|
||||
by the Installation Information. But this requirement does not apply
|
||||
if neither you nor any third party retains the ability to install
|
||||
modified object code on the User Product (for example, the work has
|
||||
been installed in ROM).
|
||||
|
||||
The requirement to provide Installation Information does not include a
|
||||
requirement to continue to provide support service, warranty, or updates
|
||||
for a work that has been modified or installed by the recipient, or for
|
||||
the User Product in which it has been modified or installed. Access to a
|
||||
network may be denied when the modification itself materially and
|
||||
adversely affects the operation of the network or violates the rules and
|
||||
protocols for communication across the network.
|
||||
|
||||
Corresponding Source conveyed, and Installation Information provided,
|
||||
in accord with this section must be in a format that is publicly
|
||||
documented (and with an implementation available to the public in
|
||||
source code form), and must require no special password or key for
|
||||
unpacking, reading or copying.
|
||||
|
||||
7. Additional Terms.
|
||||
|
||||
"Additional permissions" are terms that supplement the terms of this
|
||||
License by making exceptions from one or more of its conditions.
|
||||
Additional permissions that are applicable to the entire Program shall
|
||||
be treated as though they were included in this License, to the extent
|
||||
that they are valid under applicable law. If additional permissions
|
||||
apply only to part of the Program, that part may be used separately
|
||||
under those permissions, but the entire Program remains governed by
|
||||
this License without regard to the additional permissions.
|
||||
|
||||
When you convey a copy of a covered work, you may at your option
|
||||
remove any additional permissions from that copy, or from any part of
|
||||
it. (Additional permissions may be written to require their own
|
||||
removal in certain cases when you modify the work.) You may place
|
||||
additional permissions on material, added by you to a covered work,
|
||||
for which you have or can give appropriate copyright permission.
|
||||
|
||||
Notwithstanding any other provision of this License, for material you
|
||||
add to a covered work, you may (if authorized by the copyright holders of
|
||||
that material) supplement the terms of this License with terms:
|
||||
|
||||
a) Disclaiming warranty or limiting liability differently from the
|
||||
terms of sections 15 and 16 of this License; or
|
||||
|
||||
b) Requiring preservation of specified reasonable legal notices or
|
||||
author attributions in that material or in the Appropriate Legal
|
||||
Notices displayed by works containing it; or
|
||||
|
||||
c) Prohibiting misrepresentation of the origin of that material, or
|
||||
requiring that modified versions of such material be marked in
|
||||
reasonable ways as different from the original version; or
|
||||
|
||||
d) Limiting the use for publicity purposes of names of licensors or
|
||||
authors of the material; or
|
||||
|
||||
e) Declining to grant rights under trademark law for use of some
|
||||
trade names, trademarks, or service marks; or
|
||||
|
||||
f) Requiring indemnification of licensors and authors of that
|
||||
material by anyone who conveys the material (or modified versions of
|
||||
it) with contractual assumptions of liability to the recipient, for
|
||||
any liability that these contractual assumptions directly impose on
|
||||
those licensors and authors.
|
||||
|
||||
All other non-permissive additional terms are considered "further
|
||||
restrictions" within the meaning of section 10. If the Program as you
|
||||
received it, or any part of it, contains a notice stating that it is
|
||||
governed by this License along with a term that is a further
|
||||
restriction, you may remove that term. If a license document contains
|
||||
a further restriction but permits relicensing or conveying under this
|
||||
License, you may add to a covered work material governed by the terms
|
||||
of that license document, provided that the further restriction does
|
||||
not survive such relicensing or conveying.
|
||||
|
||||
If you add terms to a covered work in accord with this section, you
|
||||
must place, in the relevant source files, a statement of the
|
||||
additional terms that apply to those files, or a notice indicating
|
||||
where to find the applicable terms.
|
||||
|
||||
Additional terms, permissive or non-permissive, may be stated in the
|
||||
form of a separately written license, or stated as exceptions;
|
||||
the above requirements apply either way.
|
||||
|
||||
8. Termination.
|
||||
|
||||
You may not propagate or modify a covered work except as expressly
|
||||
provided under this License. Any attempt otherwise to propagate or
|
||||
modify it is void, and will automatically terminate your rights under
|
||||
this License (including any patent licenses granted under the third
|
||||
paragraph of section 11).
|
||||
|
||||
However, if you cease all violation of this License, then your
|
||||
license from a particular copyright holder is reinstated (a)
|
||||
provisionally, unless and until the copyright holder explicitly and
|
||||
finally terminates your license, and (b) permanently, if the copyright
|
||||
holder fails to notify you of the violation by some reasonable means
|
||||
prior to 60 days after the cessation.
|
||||
|
||||
Moreover, your license from a particular copyright holder is
|
||||
reinstated permanently if the copyright holder notifies you of the
|
||||
violation by some reasonable means, this is the first time you have
|
||||
received notice of violation of this License (for any work) from that
|
||||
copyright holder, and you cure the violation prior to 30 days after
|
||||
your receipt of the notice.
|
||||
|
||||
Termination of your rights under this section does not terminate the
|
||||
licenses of parties who have received copies or rights from you under
|
||||
this License. If your rights have been terminated and not permanently
|
||||
reinstated, you do not qualify to receive new licenses for the same
|
||||
material under section 10.
|
||||
|
||||
9. Acceptance Not Required for Having Copies.
|
||||
|
||||
You are not required to accept this License in order to receive or
|
||||
run a copy of the Program. Ancillary propagation of a covered work
|
||||
occurring solely as a consequence of using peer-to-peer transmission
|
||||
to receive a copy likewise does not require acceptance. However,
|
||||
nothing other than this License grants you permission to propagate or
|
||||
modify any covered work. These actions infringe copyright if you do
|
||||
not accept this License. Therefore, by modifying or propagating a
|
||||
covered work, you indicate your acceptance of this License to do so.
|
||||
|
||||
10. Automatic Licensing of Downstream Recipients.
|
||||
|
||||
Each time you convey a covered work, the recipient automatically
|
||||
receives a license from the original licensors, to run, modify and
|
||||
propagate that work, subject to this License. You are not responsible
|
||||
for enforcing compliance by third parties with this License.
|
||||
|
||||
An "entity transaction" is a transaction transferring control of an
|
||||
organization, or substantially all assets of one, or subdividing an
|
||||
organization, or merging organizations. If propagation of a covered
|
||||
work results from an entity transaction, each party to that
|
||||
transaction who receives a copy of the work also receives whatever
|
||||
licenses to the work the party's predecessor in interest had or could
|
||||
give under the previous paragraph, plus a right to possession of the
|
||||
Corresponding Source of the work from the predecessor in interest, if
|
||||
the predecessor has it or can get it with reasonable efforts.
|
||||
|
||||
You may not impose any further restrictions on the exercise of the
|
||||
rights granted or affirmed under this License. For example, you may
|
||||
not impose a license fee, royalty, or other charge for exercise of
|
||||
rights granted under this License, and you may not initiate litigation
|
||||
(including a cross-claim or counterclaim in a lawsuit) alleging that
|
||||
any patent claim is infringed by making, using, selling, offering for
|
||||
sale, or importing the Program or any portion of it.
|
||||
|
||||
11. Patents.
|
||||
|
||||
A "contributor" is a copyright holder who authorizes use under this
|
||||
License of the Program or a work on which the Program is based. The
|
||||
work thus licensed is called the contributor's "contributor version".
|
||||
|
||||
A contributor's "essential patent claims" are all patent claims
|
||||
owned or controlled by the contributor, whether already acquired or
|
||||
hereafter acquired, that would be infringed by some manner, permitted
|
||||
by this License, of making, using, or selling its contributor version,
|
||||
but do not include claims that would be infringed only as a
|
||||
consequence of further modification of the contributor version. For
|
||||
purposes of this definition, "control" includes the right to grant
|
||||
patent sublicenses in a manner consistent with the requirements of
|
||||
this License.
|
||||
|
||||
Each contributor grants you a non-exclusive, worldwide, royalty-free
|
||||
patent license under the contributor's essential patent claims, to
|
||||
make, use, sell, offer for sale, import and otherwise run, modify and
|
||||
propagate the contents of its contributor version.
|
||||
|
||||
In the following three paragraphs, a "patent license" is any express
|
||||
agreement or commitment, however denominated, not to enforce a patent
|
||||
(such as an express permission to practice a patent or covenant not to
|
||||
sue for patent infringement). To "grant" such a patent license to a
|
||||
party means to make such an agreement or commitment not to enforce a
|
||||
patent against the party.
|
||||
|
||||
If you convey a covered work, knowingly relying on a patent license,
|
||||
and the Corresponding Source of the work is not available for anyone
|
||||
to copy, free of charge and under the terms of this License, through a
|
||||
publicly available network server or other readily accessible means,
|
||||
then you must either (1) cause the Corresponding Source to be so
|
||||
available, or (2) arrange to deprive yourself of the benefit of the
|
||||
patent license for this particular work, or (3) arrange, in a manner
|
||||
consistent with the requirements of this License, to extend the patent
|
||||
license to downstream recipients. "Knowingly relying" means you have
|
||||
actual knowledge that, but for the patent license, your conveying the
|
||||
covered work in a country, or your recipient's use of the covered work
|
||||
in a country, would infringe one or more identifiable patents in that
|
||||
country that you have reason to believe are valid.
|
||||
|
||||
If, pursuant to or in connection with a single transaction or
|
||||
arrangement, you convey, or propagate by procuring conveyance of, a
|
||||
covered work, and grant a patent license to some of the parties
|
||||
receiving the covered work authorizing them to use, propagate, modify
|
||||
or convey a specific copy of the covered work, then the patent license
|
||||
you grant is automatically extended to all recipients of the covered
|
||||
work and works based on it.
|
||||
|
||||
A patent license is "discriminatory" if it does not include within
|
||||
the scope of its coverage, prohibits the exercise of, or is
|
||||
conditioned on the non-exercise of one or more of the rights that are
|
||||
specifically granted under this License. You may not convey a covered
|
||||
work if you are a party to an arrangement with a third party that is
|
||||
in the business of distributing software, under which you make payment
|
||||
to the third party based on the extent of your activity of conveying
|
||||
the work, and under which the third party grants, to any of the
|
||||
parties who would receive the covered work from you, a discriminatory
|
||||
patent license (a) in connection with copies of the covered work
|
||||
conveyed by you (or copies made from those copies), or (b) primarily
|
||||
for and in connection with specific products or compilations that
|
||||
contain the covered work, unless you entered into that arrangement,
|
||||
or that patent license was granted, prior to 28 March 2007.
|
||||
|
||||
Nothing in this License shall be construed as excluding or limiting
|
||||
any implied license or other defenses to infringement that may
|
||||
otherwise be available to you under applicable patent law.
|
||||
|
||||
12. No Surrender of Others' Freedom.
|
||||
|
||||
If conditions are imposed on you (whether by court order, agreement or
|
||||
otherwise) that contradict the conditions of this License, they do not
|
||||
excuse you from the conditions of this License. If you cannot convey a
|
||||
covered work so as to satisfy simultaneously your obligations under this
|
||||
License and any other pertinent obligations, then as a consequence you may
|
||||
not convey it at all. For example, if you agree to terms that obligate you
|
||||
to collect a royalty for further conveying from those to whom you convey
|
||||
the Program, the only way you could satisfy both those terms and this
|
||||
License would be to refrain entirely from conveying the Program.
|
||||
|
||||
13. Remote Network Interaction; Use with the GNU General Public License.
|
||||
|
||||
Notwithstanding any other provision of this License, if you modify the
|
||||
Program, your modified version must prominently offer all users
|
||||
interacting with it remotely through a computer network (if your version
|
||||
supports such interaction) an opportunity to receive the Corresponding
|
||||
Source of your version by providing access to the Corresponding Source
|
||||
from a network server at no charge, through some standard or customary
|
||||
means of facilitating copying of software. This Corresponding Source
|
||||
shall include the Corresponding Source for any work covered by version 3
|
||||
of the GNU General Public License that is incorporated pursuant to the
|
||||
following paragraph.
|
||||
|
||||
Notwithstanding any other provision of this License, you have
|
||||
permission to link or combine any covered work with a work licensed
|
||||
under version 3 of the GNU General Public License into a single
|
||||
combined work, and to convey the resulting work. The terms of this
|
||||
License will continue to apply to the part which is the covered work,
|
||||
but the work with which it is combined will remain governed by version
|
||||
3 of the GNU General Public License.
|
||||
|
||||
14. Revised Versions of this License.
|
||||
|
||||
The Free Software Foundation may publish revised and/or new versions of
|
||||
the GNU Affero General Public License from time to time. Such new versions
|
||||
will be similar in spirit to the present version, but may differ in detail to
|
||||
address new problems or concerns.
|
||||
|
||||
Each version is given a distinguishing version number. If the
|
||||
Program specifies that a certain numbered version of the GNU Affero General
|
||||
Public License "or any later version" applies to it, you have the
|
||||
option of following the terms and conditions either of that numbered
|
||||
version or of any later version published by the Free Software
|
||||
Foundation. If the Program does not specify a version number of the
|
||||
GNU Affero General Public License, you may choose any version ever published
|
||||
by the Free Software Foundation.
|
||||
|
||||
If the Program specifies that a proxy can decide which future
|
||||
versions of the GNU Affero General Public License can be used, that proxy's
|
||||
public statement of acceptance of a version permanently authorizes you
|
||||
to choose that version for the Program.
|
||||
|
||||
Later license versions may give you additional or different
|
||||
permissions. However, no additional obligations are imposed on any
|
||||
author or copyright holder as a result of your choosing to follow a
|
||||
later version.
|
||||
|
||||
15. Disclaimer of Warranty.
|
||||
|
||||
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
|
||||
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
|
||||
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
|
||||
OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
|
||||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
|
||||
IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
|
||||
ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
|
||||
|
||||
16. Limitation of Liability.
|
||||
|
||||
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
|
||||
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
|
||||
THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
|
||||
GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
|
||||
USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
|
||||
DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
|
||||
PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
|
||||
EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
|
||||
SUCH DAMAGES.
|
||||
|
||||
17. Interpretation of Sections 15 and 16.
|
||||
|
||||
If the disclaimer of warranty and limitation of liability provided
|
||||
above cannot be given local legal effect according to their terms,
|
||||
reviewing courts shall apply local law that most closely approximates
|
||||
an absolute waiver of all civil liability in connection with the
|
||||
Program, unless a warranty or assumption of liability accompanies a
|
||||
copy of the Program in return for a fee.
|
||||
|
||||
END OF TERMS AND CONDITIONS
|
||||
|
||||
How to Apply These Terms to Your New Programs
|
||||
|
||||
If you develop a new program, and you want it to be of the greatest
|
||||
possible use to the public, the best way to achieve this is to make it
|
||||
free software which everyone can redistribute and change under these terms.
|
||||
|
||||
To do so, attach the following notices to the program. It is safest
|
||||
to attach them to the start of each source file to most effectively
|
||||
state the exclusion of warranty; and each file should have at least
|
||||
the "copyright" line and a pointer to where the full notice is found.
|
||||
|
||||
<one line to give the program's name and a brief idea of what it does.>
|
||||
Copyright (C) <year> <name of author>
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Affero General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Affero General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
Also add information on how to contact you by electronic and paper mail.
|
||||
|
||||
If your software can interact with users remotely through a computer
|
||||
network, you should also make sure that it provides a way for users to
|
||||
get its source. For example, if your program is a web application, its
|
||||
interface could display a "Source" link that leads users to an archive
|
||||
of the code. There are many ways you could offer source, and different
|
||||
solutions will be better for different programs; see section 13 for the
|
||||
specific requirements.
|
||||
|
||||
You should also get your employer (if you work as a programmer) or school,
|
||||
if any, to sign a "copyright disclaimer" for the program, if necessary.
|
||||
For more information on this, and how to apply and follow the GNU AGPL, see
|
||||
<https://www.gnu.org/licenses/>.
|
||||
|
|
|
@ -3,9 +3,10 @@ All rights reserved.
|
|||
|
||||
Note: Continued Applicability of the BSD-3-Clause License
|
||||
|
||||
Despite the shift to the dual-licensing model with Redis Community Edition version 7.4 (RSALv2 or SSPLv1), portions of
|
||||
Redis Community Edition remain available subject to the BSD-3-Clause License (BSD). See below for the full BSD
|
||||
license:
|
||||
Despite the shift to the dual-licensing model with version 7.4 (RSALv2 or SSPLv1) and
|
||||
the shift to a tri-license option with version 8.0 (RSALv2/SSPLv1/AGPLv3), portions of
|
||||
Redis Open Source remain available subject to the BSD-3-Clause License (BSD).
|
||||
See below for the full BSD license:
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are permitted
|
||||
provided that the following conditions are met:
|
||||
|
|
15
SECURITY.md
15
SECURITY.md
|
@ -9,13 +9,14 @@ performance and security.
|
|||
We generally backport security issues to a single previous major version,
|
||||
unless this is not possible or feasible with a reasonable effort.
|
||||
|
||||
| Version | Supported |
|
||||
| ------- | ------------------ |
|
||||
| 7.4.x | :white_check_mark: |
|
||||
| 7.2.x | :white_check_mark: |
|
||||
| < 7.2.x | :x: |
|
||||
| 6.2.x | :white_check_mark: |
|
||||
| < 6.2.x | :x: |
|
||||
| Version | Supported |
|
||||
|---------|-------------------------------------------------------------|
|
||||
| 8.0.x | :white_check_mark: |
|
||||
| 7.4.x | :white_check_mark: |
|
||||
| 7.2.x | :white_check_mark: |
|
||||
| < 7.2.x | :x: |
|
||||
| 6.2.x | :white_check_mark: Support may be removed after end of 2025 |
|
||||
| < 6.2.x | :x: |
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
|
|
|
@ -12,6 +12,23 @@ BINCOLOR="\033[37;1m"
|
|||
MAKECOLOR="\033[32;1m"
|
||||
ENDCOLOR="\033[0m"
|
||||
|
||||
DEPS_CFLAGS := $(CFLAGS)
|
||||
DEPS_LDFLAGS := $(LDFLAGS)
|
||||
CLANG := $(findstring clang,$(shell sh -c '$(CC) --version | head -1'))
|
||||
|
||||
# MSan looks for errors related to uninitialized memory.
|
||||
# Make sure to build the dependencies with MSan as it needs all the code to be instrumented.
|
||||
# A library could be used to initialize memory but if it's not build with --fsanitize=memory then
|
||||
# MSan doesn't know about it and will spit false positive error when that memory is then used.
|
||||
ifeq ($(SANITIZER),memory)
|
||||
ifeq (clang, $(CLANG))
|
||||
DEPS_CFLAGS+=-fsanitize=memory -fsanitize-memory-track-origins=2 -fno-sanitize-recover=all -fno-omit-frame-pointer
|
||||
DEPS_LDFLAGS+=-fsanitize=memory
|
||||
else
|
||||
$(error "MemorySanitizer needs to be compiled and linked with clang. Please use CC=clang")
|
||||
endif
|
||||
endif
|
||||
|
||||
default:
|
||||
@echo "Explicit target required"
|
||||
|
||||
|
@ -53,31 +70,31 @@ endif
|
|||
|
||||
hiredis: .make-prerequisites
|
||||
@printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR)
|
||||
cd hiredis && $(MAKE) static $(HIREDIS_MAKE_FLAGS)
|
||||
cd hiredis && $(MAKE) static $(HIREDIS_MAKE_FLAGS) CFLAGS="$(DEPS_CFLAGS)" LDFLAGS="$(DEPS_LDFLAGS)"
|
||||
|
||||
.PHONY: hiredis
|
||||
|
||||
linenoise: .make-prerequisites
|
||||
@printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR)
|
||||
cd linenoise && $(MAKE)
|
||||
cd linenoise && $(MAKE) CFLAGS="$(DEPS_CFLAGS)" LDFLAGS="$(DEPS_LDFLAGS)"
|
||||
|
||||
.PHONY: linenoise
|
||||
|
||||
hdr_histogram: .make-prerequisites
|
||||
@printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR)
|
||||
cd hdr_histogram && $(MAKE)
|
||||
cd hdr_histogram && $(MAKE) CFLAGS="$(DEPS_CFLAGS)" LDFLAGS="$(DEPS_LDFLAGS)"
|
||||
|
||||
.PHONY: hdr_histogram
|
||||
|
||||
fpconv: .make-prerequisites
|
||||
@printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR)
|
||||
cd fpconv && $(MAKE)
|
||||
cd fpconv && $(MAKE) CFLAGS="$(DEPS_CFLAGS)" LDFLAGS="$(DEPS_LDFLAGS)"
|
||||
|
||||
.PHONY: fpconv
|
||||
|
||||
fast_float: .make-prerequisites
|
||||
@printf '%b %b\n' $(MAKECOLOR)MAKE$(ENDCOLOR) $(BINCOLOR)$@$(ENDCOLOR)
|
||||
cd fast_float && $(MAKE) libfast_float
|
||||
cd fast_float && $(MAKE) libfast_float CFLAGS="$(DEPS_CFLAGS)" LDFLAGS="$(DEPS_LDFLAGS)"
|
||||
|
||||
.PHONY: fast_float
|
||||
|
||||
|
@ -86,8 +103,8 @@ ifeq ($(uname_S),SunOS)
|
|||
LUA_CFLAGS= -D__C99FEATURES__=1
|
||||
endif
|
||||
|
||||
LUA_CFLAGS+= -Wall -DLUA_ANSI -DENABLE_CJSON_GLOBAL -DREDIS_STATIC='' -DLUA_USE_MKSTEMP $(CFLAGS)
|
||||
LUA_LDFLAGS+= $(LDFLAGS)
|
||||
LUA_CFLAGS+= -Wall -DLUA_ANSI -DENABLE_CJSON_GLOBAL -DREDIS_STATIC='' -DLUA_USE_MKSTEMP $(DEPS_CFLAGS)
|
||||
LUA_LDFLAGS+= $(DEPS_LDFLAGS)
|
||||
ifeq ($(LUA_DEBUG),yes)
|
||||
LUA_CFLAGS+= -O0 -g -DLUA_USE_APICHECK
|
||||
else
|
||||
|
|
|
@ -25,6 +25,7 @@ all: $(TARGET_MODULE)
|
|||
|
||||
$(TARGET_MODULE): get_source
|
||||
$(MAKE) -C $(SRC_DIR)
|
||||
cp ${TARGET_MODULE} ./
|
||||
|
||||
get_source: $(SRC_DIR)/.prepared
|
||||
|
||||
|
@ -35,8 +36,9 @@ $(SRC_DIR)/.prepared:
|
|||
|
||||
clean:
|
||||
-$(MAKE) -C $(SRC_DIR) clean
|
||||
-rm -f ./*.so
|
||||
|
||||
distclean:
|
||||
distclean: clean
|
||||
-$(MAKE) -C $(SRC_DIR) distclean
|
||||
|
||||
pristine:
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
SRC_DIR = src
|
||||
MODULE_VERSION = v7.99.4
|
||||
MODULE_VERSION = v8.0.1
|
||||
MODULE_REPO = https://github.com/redisbloom/redisbloom
|
||||
TARGET_MODULE = $(SRC_DIR)/bin/$(FULL_VARIANT)/redisbloom.so
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
SRC_DIR = src
|
||||
MODULE_VERSION = v7.99.5
|
||||
MODULE_VERSION = v8.0.1
|
||||
MODULE_REPO = https://github.com/redisearch/redisearch
|
||||
TARGET_MODULE = $(SRC_DIR)/bin/$(FULL_VARIANT)/search-community/redisearch.so
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
SRC_DIR = src
|
||||
MODULE_VERSION = v7.99.4
|
||||
MODULE_VERSION = v8.0.1
|
||||
MODULE_REPO = https://github.com/redisjson/redisjson
|
||||
TARGET_MODULE = $(SRC_DIR)/bin/$(FULL_VARIANT)/rejson.so
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
SRC_DIR = src
|
||||
MODULE_VERSION = v7.99.4
|
||||
MODULE_VERSION = v8.0.1
|
||||
MODULE_REPO = https://github.com/redistimeseries/redistimeseries
|
||||
TARGET_MODULE = $(SRC_DIR)/bin/$(FULL_VARIANT)/redistimeseries.so
|
||||
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
__pycache__
|
||||
misc
|
||||
*.so
|
||||
*.xo
|
||||
*.o
|
||||
.DS_Store
|
||||
w2v
|
||||
word2vec.bin
|
||||
TODO
|
||||
*.txt
|
||||
*.rdb
|
|
@ -0,0 +1,87 @@
|
|||
# Compiler settings
|
||||
CC = cc
|
||||
|
||||
ifdef SANITIZER
|
||||
ifeq ($(SANITIZER),address)
|
||||
SAN=-fsanitize=address
|
||||
else
|
||||
ifeq ($(SANITIZER),undefined)
|
||||
SAN=-fsanitize=undefined
|
||||
else
|
||||
ifeq ($(SANITIZER),thread)
|
||||
SAN=-fsanitize=thread
|
||||
else
|
||||
$(error "unknown sanitizer=${SANITIZER}")
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
CFLAGS = -O2 -Wall -Wextra -g $(SAN) -std=c11
|
||||
LDFLAGS = -lm $(SAN)
|
||||
|
||||
# Detect OS
|
||||
uname_S := $(shell sh -c 'uname -s 2>/dev/null || echo not')
|
||||
uname_M := $(shell sh -c 'uname -m 2>/dev/null || echo not')
|
||||
|
||||
# Shared library compile flags for linux / osx
|
||||
ifeq ($(uname_S),Linux)
|
||||
SHOBJ_CFLAGS ?= -W -Wall -fno-common -g -ggdb -std=c11 -O2
|
||||
SHOBJ_LDFLAGS ?= -shared
|
||||
ifneq (,$(findstring armv,$(uname_M)))
|
||||
SHOBJ_LDFLAGS += -latomic
|
||||
endif
|
||||
ifneq (,$(findstring aarch64,$(uname_M)))
|
||||
SHOBJ_LDFLAGS += -latomic
|
||||
endif
|
||||
else
|
||||
SHOBJ_CFLAGS ?= -W -Wall -dynamic -fno-common -g -ggdb -std=c11 -O3
|
||||
SHOBJ_LDFLAGS ?= -bundle -undefined dynamic_lookup
|
||||
endif
|
||||
|
||||
# OS X 11.x doesn't have /usr/lib/libSystem.dylib and needs an explicit setting.
|
||||
ifeq ($(uname_S),Darwin)
|
||||
ifeq ("$(wildcard /usr/lib/libSystem.dylib)","")
|
||||
LIBS = -L /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/usr/lib -lsystem
|
||||
endif
|
||||
endif
|
||||
|
||||
.SUFFIXES: .c .so .xo .o
|
||||
|
||||
all: vset.so
|
||||
|
||||
.c.xo:
|
||||
$(CC) -I. $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@
|
||||
|
||||
vset.xo: ../../src/redismodule.h expr.c
|
||||
|
||||
vset.so: vset.xo hnsw.xo
|
||||
$(CC) -o $@ $^ $(SHOBJ_LDFLAGS) $(LIBS) $(SAN) -lc
|
||||
|
||||
# Example sources / objects
|
||||
SRCS = hnsw.c w2v.c
|
||||
OBJS = $(SRCS:.c=.o)
|
||||
|
||||
TARGET = w2v
|
||||
MODULE = vset.so
|
||||
|
||||
# Default target
|
||||
all: $(TARGET) $(MODULE)
|
||||
|
||||
# Example linking rule
|
||||
$(TARGET): $(OBJS)
|
||||
$(CC) $(OBJS) $(LDFLAGS) -o $(TARGET)
|
||||
|
||||
# Compilation rule for object files
|
||||
%.o: %.c
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
expr-test: expr.c fastjson.c fastjson_test.c
|
||||
$(CC) $(CFLAGS) expr.c -o expr-test -DTEST_MAIN -lm
|
||||
|
||||
# Clean rule
|
||||
clean:
|
||||
rm -f $(TARGET) $(OBJS) *.xo *.so
|
||||
|
||||
# Declare phony targets
|
||||
.PHONY: all clean
|
|
@ -0,0 +1,645 @@
|
|||
**IMPORTANT:** *Please note that this is a merged module, it's part of the Redis binary now, and you don't need to build it and load it into Redis. Compiling Redis version 8 or greater will result into having the Vector Sets commands available. However, you could compile this module as a shared library in order to load it in older versions of Redis.*
|
||||
|
||||
This module implements Vector Sets for Redis, a new Redis data type similar
|
||||
to Sorted Sets but having string elements associated to a vector instead of
|
||||
a score. The fundamental goal of Vector Sets is to make possible adding items,
|
||||
and later get a subset of the added items that are the most similar to a
|
||||
specified vector (often a learned embedding), or the most similar to the vector
|
||||
of an element that is already part of the Vector Set.
|
||||
|
||||
Moreover, Vector sets implement optional filtered search capabilities: it is possible to associate attributes to all or to a subset of elements in the set, and then, using the `FILTER` option of the `VSIM` command, to ask for items similar to a given vector but also passing a filter specified as a simple mathematical expression (Like `".year > 1950"` or similar). This means that **you can have vector similarity and scalar filters at the same time**.
|
||||
|
||||
## Installation
|
||||
|
||||
**WARNING:** If you are running **Redis 8.0 RC1 or greater** you don't need to install anything, just compile Redis, and the Vector Sets commands will be part of the default install. Otherwise to test Vector Sets with older Redis versions follow the following instructions.
|
||||
|
||||
Build with:
|
||||
|
||||
make
|
||||
|
||||
Then load the module with the following command line, or by inserting the needed directives in the `redis.conf` file.
|
||||
|
||||
./redis-server --loadmodule vset.so
|
||||
|
||||
To run tests, I suggest using this:
|
||||
|
||||
./redis-server --save "" --enable-debug-command yes
|
||||
|
||||
The execute the tests with:
|
||||
|
||||
./test.py
|
||||
|
||||
## Reference of available commands
|
||||
|
||||
**VADD: add items into a vector set**
|
||||
|
||||
VADD key [REDUCE dim] FP32|VALUES vector element [CAS] [NOQUANT | Q8 | BIN]
|
||||
[EF build-exploration-factor] [SETATTR <attributes>] [M <numlinks>]
|
||||
|
||||
Add a new element into the vector set specified by the key.
|
||||
The vector can be provided as FP32 blob of values, or as floating point
|
||||
numbers as strings, prefixed by the number of elements (3 in the example):
|
||||
|
||||
VADD mykey VALUES 3 0.1 1.2 0.5 my-element
|
||||
|
||||
Meaning of the options:
|
||||
|
||||
`REDUCE` implements random projection, in order to reduce the
|
||||
dimensionality of the vector. The projection matrix is saved and reloaded
|
||||
along with the vector set. **Please note that** the `REDUCE` option must be passed immediately before the vector, like in `REDUCE 50 VALUES ...`.
|
||||
|
||||
`CAS` performs the operation partially using threads, in a
|
||||
check-and-set style. The neighbor candidates collection, which is slow, is
|
||||
performed in the background, while the command is executed in the main thread.
|
||||
|
||||
`NOQUANT` forces the vector to be created (in the first VADD call to a given key) without integer 8 quantization, which is otherwise the default.
|
||||
|
||||
`BIN` forces the vector to use binary quantization instead of int8. This is much faster and uses less memory, but has impacts on the recall quality.
|
||||
|
||||
`Q8` forces the vector to use signed 8 bit quantization. This is the default, and the option only exists in order to make sure to check at insertion time if the vector set is of the same format.
|
||||
|
||||
`EF` plays a role in the effort made to find good candidates when connecting the new node to the existing HNSW graph. The default is 200. Using a larger value, may help to have a better recall. To improve the recall it is also possible to increase `EF` during `VSIM` searches.
|
||||
|
||||
`SETATTR` associates attributes to the newly created entry or update the entry attributes (if it already exists). It is the same as calling the `VSETATTR` attribute separately, so please check the documentation of that command in the filtered search section of this documentation.
|
||||
|
||||
`M` defaults to 16 and is the HNSW famous `M` parameters. It is the maximum number of connections that each node of the graph have with other nodes: more connections mean more memory, but a better ability to explore the graph. Nodes at layer zero (every node exists at least at layer zero) have `M*2` connections, while the other layers only have `M` connections. This means that, for instance, an `M` of 64 will use at least 1024 bytes of memory for each node! That is, `64 links * 2 times * 8 bytes pointers`, and even more, since on average each node has something like 1.33 layers (but the other layers have just `M` connections, instead of `M*2`). If you don't have a recall quality problem, the default is fine, and uses a limited amount of memory.
|
||||
|
||||
**VSIM: return elements by vector similarity**
|
||||
|
||||
VSIM key [ELE|FP32|VALUES] <vector or element> [WITHSCORES] [COUNT num] [EF search-exploration-factor] [FILTER expression] [FILTER-EF max-filtering-effort] [TRUTH] [NOTHREAD]
|
||||
|
||||
The command returns similar vectors, for simplicity (and verbosity) in the following example, instead of providing a vector using FP32 or VALUES (like in `VADD`), we will ask for elements having a vector similar to a given element already in the sorted set:
|
||||
|
||||
> VSIM word_embeddings ELE apple
|
||||
1) "apple"
|
||||
2) "apples"
|
||||
3) "pear"
|
||||
4) "fruit"
|
||||
5) "berry"
|
||||
6) "pears"
|
||||
7) "strawberry"
|
||||
8) "peach"
|
||||
9) "potato"
|
||||
10) "grape"
|
||||
|
||||
It is possible to specify a `COUNT` and also to get the similarity score (from 1 to 0, where 1 is identical, 0 is opposite vector) between the query and the returned items.
|
||||
|
||||
> VSIM word_embeddings ELE apple WITHSCORES COUNT 3
|
||||
1) "apple"
|
||||
2) "0.9998867657923256"
|
||||
3) "apples"
|
||||
4) "0.8598527610301971"
|
||||
5) "pear"
|
||||
6) "0.8226882219314575"
|
||||
|
||||
The `EF` argument is the exploration factor: the higher it is, the slower the command becomes, but the better the index is explored to find nodes that are near to our query. Sensible values are from 50 to 1000.
|
||||
|
||||
The `TRUTH` option forces the command to perform a linear scan of all the entries inside the set, without using the graph search inside the HNSW, so it returns the best matching elements (the perfect result set) that can be used in order to easily calculate the recall. Of course the linear scan is `O(N)`, so it is much slower than the `log(N)` (considering a small `COUNT`) provided by the HNSW index.
|
||||
|
||||
The `NOTHREAD` option forces the command to execute the search on the data structure in the main thread. Normally `VSIM` spawns a thread instead. This may be useful for benchmarking purposes, or when we work with extremely small vector sets and don't want to pay the cost of spawning a thread. It is possible that in the future this option will be automatically used by Redis when we detect small vector sets. Note that this option blocks the server for all the time needed to complete the command, so it is a source of potential latency issues: if you are in doubt, never use it.
|
||||
|
||||
For `FILTER` and `FILTER-EF` options, please check the filtered search section of this documentation.
|
||||
|
||||
**VDIM: return the dimension of the vectors inside the vector set**
|
||||
|
||||
VDIM keyname
|
||||
|
||||
Example:
|
||||
|
||||
> VDIM word_embeddings
|
||||
(integer) 300
|
||||
|
||||
Note that in the case of vectors that were populated using the `REDUCE`
|
||||
option, for random projection, the vector set will report the size of
|
||||
the projected (reduced) dimension. Yet the user should perform all the
|
||||
queries using full-size vectors.
|
||||
|
||||
**VCARD: return the number of elements in a vector set**
|
||||
|
||||
VCARD key
|
||||
|
||||
Example:
|
||||
|
||||
> VCARD word_embeddings
|
||||
(integer) 3000000
|
||||
|
||||
|
||||
**VREM: remove elements from vector set**
|
||||
|
||||
VREM key element
|
||||
|
||||
Example:
|
||||
|
||||
> VADD vset VALUES 3 1 0 1 bar
|
||||
(integer) 1
|
||||
> VREM vset bar
|
||||
(integer) 1
|
||||
> VREM vset bar
|
||||
(integer) 0
|
||||
|
||||
VREM does not perform thumstone / logical deletion, but will actually reclaim
|
||||
the memory from the vector set, so it is save to add and remove elements
|
||||
in a vector set in the context of long running applications that continuously
|
||||
update the same index.
|
||||
|
||||
**VEMB: return the approximated vector of an element**
|
||||
|
||||
VEMB key element
|
||||
|
||||
Example:
|
||||
|
||||
> VEMB word_embeddings SQL
|
||||
1) "0.18208661675453186"
|
||||
2) "0.08535309880971909"
|
||||
3) "0.1365649551153183"
|
||||
4) "-0.16501599550247192"
|
||||
5) "0.14225517213344574"
|
||||
... 295 more elements ...
|
||||
|
||||
Because vector sets perform insertion time normalization and optional
|
||||
quantization, the returned vector could be approximated. `VEMB` will take
|
||||
care to de-quantized and de-normalize the vector before returning it.
|
||||
|
||||
It is possible to ask VEMB to return raw data, that is, the internal representation used by the vector: fp32, int8, or a bitmap for binary quantization. This behavior is triggered by the `RAW` option of of VEMB:
|
||||
|
||||
VEMB word_embedding apple RAW
|
||||
|
||||
In this case the return value of the command is an array of three or more elements:
|
||||
1. The name of the quantization used, that is one of: "fp32", "bin", "q8".
|
||||
2. The a string blob containing the raw data, 4 bytes fp32 floats for fp32, a bitmap for binary quants, or int8 bytes array for q8 quants.
|
||||
3. A float representing the l2 of the vector before normalization. You need to multiply by this vector if you want to de-normalize the value for any reason.
|
||||
|
||||
For q8 quantization, an additional elements is also returned: the quantization
|
||||
range, so the integers from -127 to 127 represent (normalized) components
|
||||
in the range `-range`, `+range`.
|
||||
|
||||
**VISMEMBER: test if a given element already exists**
|
||||
|
||||
This command will return 1 (or true) if the specified element is already in the vector set, otherwise 0 (or false) is returned.
|
||||
|
||||
VISMEMBER key element
|
||||
|
||||
As with other existence check Redis commands, if the key does not exist it is considered as if it was empty, thus the element is reported as non existing.
|
||||
|
||||
**VLINKS: introspection command that shows neighbors for a node**
|
||||
|
||||
VLINKS key element [WITHSCORES]
|
||||
|
||||
The command reports the neighbors for each level.
|
||||
|
||||
**VINFO: introspection command that shows info about a vector set**
|
||||
|
||||
VINFO key
|
||||
|
||||
Example:
|
||||
|
||||
> VINFO word_embeddings
|
||||
1) quant-type
|
||||
2) int8
|
||||
3) vector-dim
|
||||
4) (integer) 300
|
||||
5) size
|
||||
6) (integer) 3000000
|
||||
7) max-level
|
||||
8) (integer) 12
|
||||
9) vset-uid
|
||||
10) (integer) 1
|
||||
11) hnsw-max-node-uid
|
||||
12) (integer) 3000000
|
||||
|
||||
**VSETATTR: associate or remove the JSON attributes of elements**
|
||||
|
||||
VSETATTR key element "{... json ...}"
|
||||
|
||||
Each element of a vector set can be optionally associated with a JSON string
|
||||
in order to use the `FILTER` option of `VSIM` to filter elements by scalars
|
||||
(see the filtered search section for more information). This command can set,
|
||||
update (if already set) or delete (if you set to an empty string) the
|
||||
associated JSON attributes of an element.
|
||||
|
||||
The command returns 0 if the element or the key don't exist, without
|
||||
raising an error, otherwise 1 is returned, and the element attributes
|
||||
are set or updated.
|
||||
|
||||
**VGETATTR: retrieve the JSON attributes of elements**
|
||||
|
||||
VGETATTR key element
|
||||
|
||||
The command returns the JSON attribute associated with an element, or
|
||||
null if there is no element associated, or no element at all, or no key.
|
||||
|
||||
**VRANDMEMBER: return random members from a vector set**
|
||||
|
||||
VRANDMEMBER key [count]
|
||||
|
||||
Return one or more random elements from a vector set.
|
||||
|
||||
The semantics of this command are similar to Redis's native SRANDMEMBER command:
|
||||
|
||||
- When called without count, returns a single random element from the set, as a single string (no array reply).
|
||||
- When called with a positive count, returns up to count distinct random elements (no duplicates).
|
||||
- When called with a negative count, returns count random elements, potentially with duplicates.
|
||||
- If the count value is larger than the set size (and positive), only the entire set is returned.
|
||||
|
||||
If the key doesn't exist, returns a Null reply if count is not given, or an empty array if a count is provided.
|
||||
|
||||
Examples:
|
||||
|
||||
> VADD vset VALUES 3 1 0 0 elem1
|
||||
(integer) 1
|
||||
> VADD vset VALUES 3 0 1 0 elem2
|
||||
(integer) 1
|
||||
> VADD vset VALUES 3 0 0 1 elem3
|
||||
(integer) 1
|
||||
|
||||
# Return a single random element
|
||||
> VRANDMEMBER vset
|
||||
"elem2"
|
||||
|
||||
# Return 2 distinct random elements
|
||||
> VRANDMEMBER vset 2
|
||||
1) "elem1"
|
||||
2) "elem3"
|
||||
|
||||
# Return 3 random elements with possible duplicates
|
||||
> VRANDMEMBER vset -3
|
||||
1) "elem2"
|
||||
2) "elem2"
|
||||
3) "elem1"
|
||||
|
||||
# Return more elements than in the set (returns all elements)
|
||||
> VRANDMEMBER vset 10
|
||||
1) "elem1"
|
||||
2) "elem2"
|
||||
3) "elem3"
|
||||
|
||||
# When key doesn't exist
|
||||
> VRANDMEMBER nonexistent
|
||||
(nil)
|
||||
> VRANDMEMBER nonexistent 3
|
||||
(empty array)
|
||||
|
||||
This command is particularly useful for:
|
||||
|
||||
1. Selecting random samples from a vector set for testing or training.
|
||||
2. Performance testing by retrieving random elements for subsequent similarity searches.
|
||||
|
||||
When the user asks for unique elements (positev count) the implementation optimizes for two scenarios:
|
||||
- For small sample sizes (less than 20% of the set size), it uses a dictionary to avoid duplicates, and performs a real random walk inside the graph.
|
||||
- For large sample sizes (more than 20% of the set size), it starts from a random node and sequentially traverses the internal list, providing faster performances but not really "random" elements.
|
||||
|
||||
The command has `O(N)` worst-case time complexity when requesting many unique elements (it uses linear scanning), or `O(M*log(N))` complexity when the users asks for `M` random elements in a sorted set of `N` elements, with `M` much smaller than `N`.
|
||||
|
||||
# Filtered search
|
||||
|
||||
Each element of the vector set can be associated with a set of attributes specified as a JSON blob:
|
||||
|
||||
> VADD vset VALUES 3 1 1 1 a SETATTR '{"year": 1950}'
|
||||
(integer) 1
|
||||
> VADD vset VALUES 3 -1 -1 -1 b SETATTR '{"year": 1951}'
|
||||
(integer) 1
|
||||
|
||||
Specifying an attribute with the `SETATTR` option of `VADD` is exactly equivalent to adding an element and then setting (or updating, if already set) the attributes JSON string. Also the symmetrical `VGETATTR` command returns the attribute associated to a given element.
|
||||
|
||||
> VADD vset VALUES 3 0 1 0 c
|
||||
(integer) 1
|
||||
> VSETATTR vset c '{"year": 1952}'
|
||||
(integer) 1
|
||||
> VGETATTR vset c
|
||||
"{\"year\": 1952}"
|
||||
|
||||
At this point, I may use the FILTER option of VSIM to only ask for the subset of elements that are verified by my expression:
|
||||
|
||||
> VSIM vset VALUES 3 0 0 0 FILTER '.year > 1950'
|
||||
1) "c"
|
||||
2) "b"
|
||||
|
||||
The items will be returned again in order of similarity (most similar first), but only the items with the year field matching the expression is returned.
|
||||
|
||||
The expressions are similar to what you would write inside the `if` statement of JavaScript or other familiar programming languages: you can use `and`, `or`, the obvious math operators like `+`, `-`, `/`, `>=`, `<`, ... and so forth (see the expressions section for more info). The selectors of the JSON object attributes start with a dot followed by the name of the key inside the JSON objects.
|
||||
|
||||
Elements with invalid JSON or not having a given specified field **are considered as not matching** the expression, but will not generate any error at runtime.
|
||||
|
||||
## FILTER expressions capabilities
|
||||
|
||||
FILTER expressions allow you to perform complex filtering on vector similarity results using a JavaScript-like syntax. The expression is evaluated against each element's JSON attributes, with only elements that satisfy the expression being included in the results.
|
||||
|
||||
### Expression Syntax
|
||||
|
||||
Expressions support the following operators and capabilities:
|
||||
|
||||
1. **Arithmetic operators**: `+`, `-`, `*`, `/`, `%` (modulo), `**` (exponentiation)
|
||||
2. **Comparison operators**: `>`, `>=`, `<`, `<=`, `==`, `!=`
|
||||
3. **Logical operators**: `and`/`&&`, `or`/`||`, `!`/`not`
|
||||
4. **Containment operator**: `in`
|
||||
5. **Parentheses** for grouping: `(...)`
|
||||
|
||||
### Selector Notation
|
||||
|
||||
Attributes are accessed using dot notation:
|
||||
|
||||
- `.year` references the "year" attribute
|
||||
- `.movie.year` would **NOT** reference the "year" field inside a "movie" object, only keys that are at the first level of the JSON object are accessible.
|
||||
|
||||
### JSON and expressions data types
|
||||
|
||||
Expressions can work with:
|
||||
|
||||
- Numbers (dobule precision floats)
|
||||
- Strings (enclosed in single or double quotes)
|
||||
- Booleans (no native type: they are represented as 1 for true, 0 for false)
|
||||
- Arrays (for use with the `in` operator: `value in [1, 2, 3]`)
|
||||
|
||||
JSON attributes are converted in this way:
|
||||
|
||||
- Numbers will be converted to numbers.
|
||||
- Strings to strings.
|
||||
- Booleans to 0 or 1 number.
|
||||
- Arrays to tuples (for "in" operator), but only if composed of just numbers and strings.
|
||||
|
||||
Any other type is ignored, and accessig it will make the expression evaluate to false.
|
||||
|
||||
### Examples
|
||||
|
||||
```
|
||||
# Find items from the 1980s
|
||||
VSIM movies VALUES 3 0.5 0.8 0.2 FILTER '.year >= 1980 and .year < 1990'
|
||||
|
||||
# Find action movies with high ratings
|
||||
VSIM movies VALUES 3 0.5 0.8 0.2 FILTER '.genre == "action" and .rating > 8.0'
|
||||
|
||||
# Find movies directed by either Spielberg or Nolan
|
||||
VSIM movies VALUES 3 0.5 0.8 0.2 FILTER '.director in ["Spielberg", "Nolan"]'
|
||||
|
||||
# Complex condition with numerical operations
|
||||
VSIM movies VALUES 3 0.5 0.8 0.2 FILTER '(.year - 2000) ** 2 < 100 and .rating / 2 > 4'
|
||||
```
|
||||
|
||||
### Error Handling
|
||||
|
||||
Elements with any of the following conditions are considered not matching:
|
||||
- Missing the queried JSON attribute
|
||||
- Having invalid JSON in their attributes
|
||||
- Having a JSON value that cannot be converted to the expected type
|
||||
|
||||
This behavior allows you to safely filter on optional attributes without generating errors.
|
||||
|
||||
### FILTER effort
|
||||
|
||||
The `FILTER-EF` option controls the maximum effort spent when filtering vector search results.
|
||||
|
||||
When performing vector similarity search with filtering, Vector Sets perform the standard similarity search as they apply the filter expression to each node. Since many results might be filtered out, Vector Sets may need to examine a lot more candidates than the requested `COUNT` to ensure sufficient matching results are returned. Actually, if the elements matching the filter are very rare or if there are less than elements matching than the specified count, this would trigger a full scan of the HNSW graph.
|
||||
|
||||
For this reason, by default, the maximum effort is limited to a reasonable amount of nodes explored.
|
||||
|
||||
### Modifying the FILTER effort
|
||||
|
||||
1. By default, Vector Sets will explore up to `COUNT * 100` candidates to find matching results.
|
||||
2. You can control this exploration with the `FILTER-EF` parameter.
|
||||
3. A higher `FILTER-EF` value increases the chances of finding all relevant matches at the cost of increased processing time.
|
||||
4. A `FILTER-EF` of zero will explore as many nodes as needed in order to actually return the number of elements specified by `COUNT`.
|
||||
5. Even when a high `FILTER-EF` value is specified **the implementation will do a lot less work** if the elements passing the filter are very common, because of the early stop conditions of the HNSW implementation (once the specified amount of elements is reached and the quality check of the other candidates trigger an early stop).
|
||||
|
||||
```
|
||||
VSIM key [ELE|FP32|VALUES] <vector or element> COUNT 10 FILTER '.year > 2000' FILTER-EF 500
|
||||
```
|
||||
|
||||
In this example, Vector Sets will examine up to 500 potential nodes. Of course if count is reached before exploring 500 nodes, and the quality checks show that it is not possible to make progresses on similarity, the search is ended sooner.
|
||||
|
||||
### Performance Considerations
|
||||
|
||||
- If you have highly selective filters (few items match), use a higher `FILTER-EF`, or just design your application in order to handle a result set that is smaller than the requested count. Note that anyway the additional elements may be too distant than the query vector.
|
||||
- For less selective filters, the default should be sufficient.
|
||||
- Very selective filters with low `FILTER-EF` values may return fewer items than requested.
|
||||
- Extremely high values may impact performance without significantly improving results.
|
||||
|
||||
The optimal `FILTER-EF` value depends on:
|
||||
1. The selectivity of your filter.
|
||||
2. The distribution of your data.
|
||||
3. The required recall quality.
|
||||
|
||||
A good practice is to start with the default and increase if needed when you observe fewer results than expected.
|
||||
|
||||
### Testing a larg-ish data set
|
||||
|
||||
To really see how things work at scale, you can [download](https://antirez.com/word2vec_with_attribs.rdb) the following dataset:
|
||||
|
||||
wget https://antirez.com/word2vec_with_attribs.rdb
|
||||
|
||||
It contains the 3 million words in Word2Vec having as attribute a JSON with just the length of the word. Because of the length distribution of words in large amounts of texts, where longer words become less and less common, this is ideal to check how filtering behaves with a filter verifying as true with less and less elements in a vector set.
|
||||
|
||||
For instance:
|
||||
|
||||
> VSIM word_embeddings_bin ele "pasta" FILTER ".len == 6"
|
||||
1) "pastas"
|
||||
2) "rotini"
|
||||
3) "gnocci"
|
||||
4) "panino"
|
||||
5) "salads"
|
||||
6) "breads"
|
||||
7) "salame"
|
||||
8) "sauces"
|
||||
9) "cheese"
|
||||
10) "fritti"
|
||||
|
||||
This will easily retrieve the desired amount of items (`COUNT` is 10 by default) since there are many items of length 6. However:
|
||||
|
||||
> VSIM word_embeddings_bin ele "pasta" FILTER ".len == 33"
|
||||
1) "skinless_boneless_chicken_breasts"
|
||||
2) "boneless_skinless_chicken_breasts"
|
||||
3) "Boneless_skinless_chicken_breasts"
|
||||
|
||||
This time even if we asked for 10 items, we only get 3, since the default filter effort will be `10*100 = 1000`. We can tune this giving the effort in an explicit way, with the risk of our query being slower, of course:
|
||||
|
||||
> VSIM word_embeddings_bin ele "pasta" FILTER ".len == 33" FILTER-EF 10000
|
||||
1) "skinless_boneless_chicken_breasts"
|
||||
2) "boneless_skinless_chicken_breasts"
|
||||
3) "Boneless_skinless_chicken_breasts"
|
||||
4) "mozzarella_feta_provolone_cheddar"
|
||||
5) "Greatfood.com_R_www.greatfood.com"
|
||||
6) "Pepperidge_Farm_Goldfish_crackers"
|
||||
7) "Prosecuted_Mobsters_Rebuilt_Dying"
|
||||
8) "Crispy_Snacker_Sandwiches_Popcorn"
|
||||
9) "risultati_delle_partite_disputate"
|
||||
10) "Peppermint_Mocha_Twist_Gingersnap"
|
||||
|
||||
This time we get all the ten items, even if the last one will be quite far from our query vector. We encourage to experiment with this test dataset in order to understand better the dynamics of the implementation and the natural tradeoffs of filtered search.
|
||||
|
||||
**Keep in mind** that by default, Redis Vector Sets will try to avoid a likely very useless huge scan of the HNSW graph, and will be more happy to return few or no elements at all, since this is almost always what the user actually wants in the context of retrieving *similar* items to the query.
|
||||
|
||||
# Single Instance Scalability and Latency
|
||||
|
||||
Vector Sets implement a threading model that allows Redis to handle many concurrent requests: by default `VSIM` is always threaded, and `VADD` is not (but can be partially threaded using the `CAS` option). This section explains how the threading and locking mechanisms work, and what to expect in terms of performance.
|
||||
|
||||
## Threading Model
|
||||
|
||||
- The `VSIM` command runs in a separate thread by default, allowing Redis to continue serving other commands.
|
||||
- A maximum of 32 threads can run concurrently (defined by `HNSW_MAX_THREADS`).
|
||||
- When this limit is reached, additional `VSIM` requests are queued - Redis remains responsive, no latency event is generated.
|
||||
- The `VADD` command with the `CAS` option also leverages threading for the computation-heavy candidate search phase, but the insertion itself is performed in the main thread. `VADD` always runs in a sub-millisecond time, so this is not a source of latency, but having too many hundreds of writes per second can be challenging to handle with a single instance. Please, look at the next section about multiple instances scalability.
|
||||
- Commands run within Lua scripts, MULTI/EXEC blocks, or from replication are executed in the main thread to ensure consistency.
|
||||
|
||||
```
|
||||
> VSIM vset VALUES 3 1 1 1 FILTER '.year > 2000' # This runs in a thread.
|
||||
> VADD vset VALUES 3 1 1 1 element CAS # Candidate search runs in a thread.
|
||||
```
|
||||
|
||||
## Locking Mechanism
|
||||
|
||||
Vector Sets use a read/write locking mechanism to coordinate access:
|
||||
|
||||
- Reads (`VSIM`, `VEMB`, etc.) acquire a read lock, allowing multiple concurrent reads.
|
||||
- Writes (`VADD`, `VREM`, etc.) acquire a write lock, temporarily blocking all reads.
|
||||
- When a write lock is requested while reads are in progress, the write operation waits for all reads to complete.
|
||||
- Once a write lock is granted, all reads are blocked until the write completes.
|
||||
- Each thread has a dedicated slot for tracking visited nodes during graph traversal, avoiding contention. This improves performances but limits the maximum number of concurrent threads, since each node has a memory cost proportional to the number of slots.
|
||||
|
||||
## DEL latency
|
||||
|
||||
Deleting a very large vector set (millions of elements) can cause latency spikes, as deletion rebuilds connections between nodes. This may change in the future.
|
||||
The deletion latency is most noticeable when using `DEL` on a key containing a large vector set or when the key expires.
|
||||
|
||||
## Performance Characteristics
|
||||
|
||||
- Search operations (`VSIM`) scale almost linearly with the number of CPU cores available, up to the thread limit. You can expect a Vector Set composed of million of items associated with components of dimension 300, with the default int8 quantization, to deliver around 50k VSIM operations per second in a single host.
|
||||
- Insertion operations (`VADD`) are more computationally expensive than searches, and can't be threaded: expect much lower throughput, in the range of a few thousands inserts per second.
|
||||
- Binary quantization offers significantly faster search performance at the cost of some recall quality, while int8 quantization, the default, seems to have very small impacts on recall quality, while it significantly improves performances and space efficiency.
|
||||
- The `EF` parameter has a major impact on both search quality and performance - higher values mean better recall but slower searches.
|
||||
- Graph traversal time scales logarithmically with the number of elements, making Vector Sets efficient even with millions of vectors
|
||||
|
||||
## Loading / Saving performances
|
||||
|
||||
Vector Sets are able to serialize on disk the graph structure as it is in memory, so loading back the data does not need to rebuild the HNSW graph. This means that Redis can load millions of items per minute. For instance 3 million items with 300 components vectors can be loaded back into memory into around 15 seconds.
|
||||
|
||||
# Scaling vector sets to multiple instances
|
||||
|
||||
The fundamental way vector sets can be scaled to very large data sets
|
||||
and to many Redis instances is that a given very large set of vectors
|
||||
can be partitioned into N different Redis keys, that can also live into
|
||||
different Redis instances.
|
||||
|
||||
For instance, I could add my elements into `key0`, `key1`, `key2`, by hashing
|
||||
the item in some way, like doing `crc32(item)%3`, effectively splitting
|
||||
the dataset into three different parts. However once I want all the vectors
|
||||
of my dataset near to a given query vector, I could simply perform the
|
||||
`VSIM` command against all the three keys, merging the results by
|
||||
score (so the commands must be called using the `WITHSCORES` option) on
|
||||
the client side: once the union of the results are ordered by the
|
||||
similarity score, the query is equivalent to having a single key `key1+2+3`
|
||||
containing all the items.
|
||||
|
||||
There are a few interesting facts to note about this pattern:
|
||||
|
||||
1. It is possible to have a logical sorted set that is as big as the sum of all the Redis instances we are using.
|
||||
2. Deletion operations remain simple, we can hash the key and select the key where our item belongs.
|
||||
3. However, even if I use 10 different Redis instances, I'm not going to reach 10x the **read** operations per second, compared to using a single server: for each logical query, I need to query all the instances. Yet, smaller graphs are faster to navigate, so there is some win even from the point of view of CPU usage.
|
||||
4. Insertions, so **write** queries, will be scaled linearly: I can add N items against N instances at the same time, splitting the insertion load evenly. This is very important since vector sets, being based on HNSW data structures, are slower to add items than to query similar items, by a very big factor.
|
||||
5. While it cannot guarantee always the best results, with proper timeout management this system may be considered *highly available*, since if a subset of N instances are reachable, I'll be still be able to return similar items to my query vector.
|
||||
|
||||
Notably, this pattern can be implemented in a way that avoids paying the sum of the round trip time with all the servers: it is possible to send the queries at the same time to all the instances, so that latency will be equal the slower reply out of of the N servers queries.
|
||||
|
||||
# Optimizing memory usage
|
||||
|
||||
Vector Sets, or better, HNSWs, the underlying data structure used by Vector Sets, combined with the features provided by the Vector Sets themselves (quantization, random projection, filtering, ...) form an implementation that has a non-trivial space of parameters that can be tuned. Despite to the complexity of the implementation and of vector similarity problems, here there is a list of simple ideas that can drive the user to pick the best settings:
|
||||
|
||||
* 8 bit quantization (the default) is almost always a win. It reduces the memory usage of vectors by a factor of 4, yet the performance penalty in terms of recall is minimal. It also reduces insertion and search time by around 2 times or more.
|
||||
* Binary quantization is much more extreme: it makes vector sets a lot faster, but increases the recall error in a sensible way, for instance from 95% to 80% if all the parameters remain the same. Yet, the speedup is really big, and the memory usage of vectors, compaerd to full precision vectors, 32 times smaller.
|
||||
* Vectors memory usage are not the only responsible for Vector Set high memory usage per entry: nodes contain, on average `M*2 + M*0.33` pointers, where M is by default 16 (but can be tuned in `VADD`, see the `M` option). Also each node has the string item and the optional JSON attributes: those should be as small as possible in order to avoid contributing more to the memory usage.
|
||||
* The `M` parameter should be increased to 32 or more only when a near perfect recall is really needed.
|
||||
* It is possible to gain space (less memory usage) sacrificing time (more CPU time) by using a low `M` (the default of 16, for instance) and a high `EF` (the effort parameter of `VSIM`) in order to scan the graph more deeply.
|
||||
* When memory usage is seriosu concern, and there is the suspect the vectors we are storing don't contain as much information - at least for our use case - to justify the number of components they feature, random projection (the `REDUCE` option of `VADD`) could be tested to see if dimensionality reduction is possible with acceptable precision loss.
|
||||
|
||||
## Random projection tradeoffs
|
||||
|
||||
Sometimes learned vectors are not as information dense as we could guess, that
|
||||
is there are components having similar meanings in the space, and components
|
||||
having values that don't really represent features that matter in our use case.
|
||||
|
||||
At the same time, certain vectors are very big, 1024 components or more. In this cases, it is possible to use the random projection feature of Redis Vector Sets in order to reduce both space (less RAM used) and space (more operstions per second). The feature is accessible via the `REDUCE` option of the `VADD` command. However, keep in mind that you need to test how much reduction impacts the performances of your vectors in term of recall and quality of the results you get back.
|
||||
|
||||
## What is a random projection?
|
||||
|
||||
The concept of Random Projection is relatively simple to grasp. For instance, a projection that turns a 100 components vector into a 10 components vector will perform a different linear transformation between the 100 components and each of the target 10 components. Please note that *each of the target components* will get some random amount of all the 100 original components. It is mathematically proved that this process results in a vector space where elements still have similar distances among them, but still some information will get lost.
|
||||
|
||||
## Examples of projections and loss of precision
|
||||
|
||||
To show you a bit of a extreme case, let's take Word2Vec 3 million items and compress them from 300 to 100, 50 and 25 components vectors. Then, we check the recall compared to the ground truth against each of the vector sets produced in this way (using different `REDUCE` parameters of `VADD`). This is the result, obtained asking for the top 10 elements.
|
||||
|
||||
```
|
||||
----------------------------------------------------------------------
|
||||
Key Average Recall % Std Dev
|
||||
----------------------------------------------------------------------
|
||||
word_embeddings_int8 95.98 12.14
|
||||
^ This is the same key used for ground truth, but without TRUTH option
|
||||
word_embeddings_reduced_100 40.20 20.13
|
||||
word_embeddings_reduced_50 24.42 16.89
|
||||
word_embeddings_reduced_25 14.31 9.99
|
||||
```
|
||||
|
||||
Here the dimensionality reduction we are using is quite extreme: from 300 to 100 means that 66.6% of the original information is lost. The recall drops from 96% to 40%, down to 24% and 14% for even more extreme dimension reduction.
|
||||
|
||||
Reducing the dimension of vectors that are already relatively small, like the above example, of 300 components, will provide only relatively small memory savings, especially because by default Vector Sets use `int8` quantization, that will use only one byte per component:
|
||||
|
||||
```
|
||||
> MEMORY USAGE word_embeddings_int8
|
||||
(integer) 3107002888
|
||||
> MEMORY USAGE word_embeddings_reduced_100
|
||||
(integer) 2507122888
|
||||
```
|
||||
|
||||
Of course going, for example, from 2048 component vectors to 1024 would provide a much more sensible memory saving, even with the `int8` quantization used by Vector Sets, assuming the recall loss is acceptable. Other than the memory saving, there is also the reduction in CPU time, translating to more operations per second.
|
||||
|
||||
Another thing to note is that, with certain embedding models, binary quantization (that offers a 8x reduction of memory usage compared to 8 bit quants, and a very big speedup in computation) performs much better than reducing the dimension of vectors of the same amount via random projections:
|
||||
|
||||
```
|
||||
word_embeddings_bin 35.48 19.78
|
||||
```
|
||||
|
||||
Here in the same test did above: we have a 35% recall which is not too far than the 40% obtained with a random projection from 300 to 100 components. However, while the first technique reduces the size by 3 times, the size reduced of binary quantization is by 8 times.
|
||||
|
||||
```
|
||||
> memory usage word_embeddings_bin
|
||||
(integer) 2327002888
|
||||
```
|
||||
|
||||
In this specific case the key uses JSON attributes and has a graph connection overhead that is much bigger than the 300 bits each vector takes, but, as already said, for big vectors (1024 components, for instance) or for lower values of `M` (see `VADD`, the `M` parameter connects the level of connectivity, so it changes the amount of pointers used per node) the memory saving is much stronger.
|
||||
|
||||
# Vector Sets troubleshooting and understandability
|
||||
|
||||
## Debugging poor recall or unexpected results
|
||||
|
||||
Vector graphs and similarity queries pose many challenges mainly due to the following three problems:
|
||||
|
||||
1. The error due to the approximated nature of Vector Sets is hard to evaluate.
|
||||
2. The error added by the quantization is often depends on the exact vector space (the embedding we are using **and** how far apart the elements we represent into such embeddings are).
|
||||
3. We live in the illusion that learned embeddings capture the best similarity possible among elements, which is obviously not always true, and highly application dependent.
|
||||
|
||||
The only way to debug such problems, is the ability to inspect step by step what is happening inside our application, and the structure of the HNSW graph itself. To do so, we suggest to consider the following tools:
|
||||
|
||||
1. The `TRUTH` option of the `VSIM` command is able to return the ground truth of the most similar elements, without using the HNSW graph, but doing a linear scan.
|
||||
2. The `VLINKS` command allows to explore the graph to see if the connections among nodes make sense, and to investigate why a given node may be more isolated than expected. Such command can also be used in a different way, when we want very fast "similar items" without paying the HNSW traversal time. It exploits the fact that we have a direct reference from each element in our vector set to each node in our HNSW graph.
|
||||
3. The `WITHSCORES` option, in the supported commands, return a value that is directly related to the *cosine similarity* between the query and the items vectors, the interval of the similarity is simply rescaled from the -1, 1 original range to 0, 1, otherwise the metric is identical.
|
||||
|
||||
## Clients, latency and bandwidth usage
|
||||
|
||||
During Vector Sets testing, we discovered that often clients introduce considerable latecy and CPU usage (in the client side, not in Redis) for two main reasons:
|
||||
|
||||
1. Often the serialization to `VALUES ... list of floats ...` can be very slow.
|
||||
2. The vector payload of floats represented as strings is very large, resulting in high bandwidth usage and latency, compared to other Redis commands.
|
||||
|
||||
Switching from `VALUES` to `FP32` as a method for transmitting vectors may easily provide 10-20x speedups.
|
||||
|
||||
# Known bugs
|
||||
|
||||
* Replication code is pretty much untested, and very vanilla (replicating the commands verbatim).
|
||||
|
||||
# Implementation details
|
||||
|
||||
Vector sets are based on the `hnsw.c` implementation of the HNSW data structure with extensions for speed and functionality.
|
||||
|
||||
The main features are:
|
||||
|
||||
* Proper nodes deletion with relinking.
|
||||
* 8 bits and binary quantization.
|
||||
* Threaded queries.
|
||||
* Filtered search with predicate callback.
|
|
@ -0,0 +1,383 @@
|
|||
{
|
||||
"VADD": {
|
||||
"summary": "Add one or more elements to a vector set, or update its vector if it already exists",
|
||||
"complexity": "O(log(N)) for each element added, where N is the number of elements in the vector set.",
|
||||
"group": "vector_set",
|
||||
"since": "1.0.0",
|
||||
"arity": -1,
|
||||
"function": "vaddCommand",
|
||||
"arguments": [
|
||||
{
|
||||
"name": "key",
|
||||
"type": "key"
|
||||
},
|
||||
{
|
||||
"token": "REDUCE",
|
||||
"name": "reduce",
|
||||
"type": "pure-token",
|
||||
"optional": true
|
||||
},
|
||||
{
|
||||
"name": "dim",
|
||||
"type": "integer",
|
||||
"optional": true
|
||||
},
|
||||
{
|
||||
"name": "format",
|
||||
"type": "oneof",
|
||||
"arguments": [
|
||||
{
|
||||
"name": "fp32",
|
||||
"type": "pure-token",
|
||||
"token": "FP32"
|
||||
},
|
||||
{
|
||||
"name": "values",
|
||||
"type": "pure-token",
|
||||
"token": "VALUES"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "vector",
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"name": "element",
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"token": "CAS",
|
||||
"name": "cas",
|
||||
"type": "pure-token",
|
||||
"optional": true
|
||||
},
|
||||
{
|
||||
"name": "quant_type",
|
||||
"type": "oneof",
|
||||
"optional": true,
|
||||
"arguments": [
|
||||
{
|
||||
"name": "noquant",
|
||||
"type": "pure-token",
|
||||
"token": "NOQUANT"
|
||||
},
|
||||
{
|
||||
"name": "bin",
|
||||
"type": "pure-token",
|
||||
"token": "BIN"
|
||||
},
|
||||
{
|
||||
"name": "q8",
|
||||
"type": "pure-token",
|
||||
"token": "Q8"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"token": "EF",
|
||||
"name": "build-exploration-factor",
|
||||
"type": "integer",
|
||||
"optional": true
|
||||
},
|
||||
{
|
||||
"token": "SETATTR",
|
||||
"name": "attributes",
|
||||
"type": "string",
|
||||
"optional": true
|
||||
},
|
||||
{
|
||||
"token": "M",
|
||||
"name": "numlinks",
|
||||
"type": "integer",
|
||||
"optional": true
|
||||
}
|
||||
],
|
||||
"command_flags": [
|
||||
"WRITE",
|
||||
"DENYOOM"
|
||||
]
|
||||
},
|
||||
"VREM": {
|
||||
"summary": "Remove one or more elements from a vector set",
|
||||
"complexity": "O(log(N)) for each element removed, where N is the number of elements in the vector set.",
|
||||
"group": "vector_set",
|
||||
"since": "1.0.0",
|
||||
"arity": -2,
|
||||
"function": "vremCommand",
|
||||
"command_flags": [
|
||||
"WRITE"
|
||||
],
|
||||
"arguments": [
|
||||
{
|
||||
"name": "key",
|
||||
"type": "key"
|
||||
},
|
||||
{
|
||||
"name": "element",
|
||||
"type": "string",
|
||||
"multiple": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"VSIM": {
|
||||
"summary": "Return elements by vector similarity",
|
||||
"complexity": "O(log(N)) where N is the number of elements in the vector set.",
|
||||
"group": "vector_set",
|
||||
"since": "1.0.0",
|
||||
"arity": -3,
|
||||
"function": "vsimCommand",
|
||||
"command_flags": [
|
||||
"READONLY"
|
||||
],
|
||||
"arguments": [
|
||||
{
|
||||
"name": "key",
|
||||
"type": "key"
|
||||
},
|
||||
{
|
||||
"name": "format",
|
||||
"type": "oneof",
|
||||
"arguments": [
|
||||
{
|
||||
"name": "ele",
|
||||
"type": "pure-token",
|
||||
"token": "ELE"
|
||||
},
|
||||
{
|
||||
"name": "fp32",
|
||||
"type": "pure-token",
|
||||
"token": "FP32"
|
||||
},
|
||||
{
|
||||
"name": "values",
|
||||
"type": "pure-token",
|
||||
"token": "VALUES"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "vector_or_element",
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"token": "WITHSCORES",
|
||||
"name": "withscores",
|
||||
"type": "pure-token",
|
||||
"optional": true
|
||||
},
|
||||
{
|
||||
"token": "COUNT",
|
||||
"name": "count",
|
||||
"type": "integer",
|
||||
"optional": true
|
||||
},
|
||||
{
|
||||
"token": "EPSILON",
|
||||
"name": "max_distance",
|
||||
"type": "double",
|
||||
"optional": true
|
||||
},
|
||||
{
|
||||
"token": "EF",
|
||||
"name": "search-exploration-factor",
|
||||
"type": "integer",
|
||||
"optional": true
|
||||
},
|
||||
{
|
||||
"token": "FILTER",
|
||||
"name": "expression",
|
||||
"type": "string",
|
||||
"optional": true
|
||||
},
|
||||
{
|
||||
"token": "FILTER-EF",
|
||||
"name": "max-filtering-effort",
|
||||
"type": "integer",
|
||||
"optional": true
|
||||
},
|
||||
{
|
||||
"token": "TRUTH",
|
||||
"name": "truth",
|
||||
"type": "pure-token",
|
||||
"optional": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"VDIM": {
|
||||
"summary": "Return the dimension of vectors in the vector set",
|
||||
"complexity": "O(1)",
|
||||
"group": "vector_set",
|
||||
"since": "1.0.0",
|
||||
"arity": 2,
|
||||
"function": "vdimCommand",
|
||||
"command_flags": [
|
||||
"READONLY",
|
||||
"FAST"
|
||||
],
|
||||
"arguments": [
|
||||
{
|
||||
"name": "key",
|
||||
"type": "key"
|
||||
}
|
||||
]
|
||||
},
|
||||
"VCARD": {
|
||||
"summary": "Return the number of elements in a vector set",
|
||||
"complexity": "O(1)",
|
||||
"group": "vector_set",
|
||||
"since": "1.0.0",
|
||||
"arity": 2,
|
||||
"function": "vcardCommand",
|
||||
"command_flags": [
|
||||
"READONLY",
|
||||
"FAST"
|
||||
],
|
||||
"arguments": [
|
||||
{
|
||||
"name": "key",
|
||||
"type": "key"
|
||||
}
|
||||
]
|
||||
},
|
||||
"VEMB": {
|
||||
"summary": "Return the vector associated with an element",
|
||||
"complexity": "O(1)",
|
||||
"group": "vector_set",
|
||||
"since": "1.0.0",
|
||||
"arity": -3,
|
||||
"function": "vembCommand",
|
||||
"command_flags": [
|
||||
"READONLY"
|
||||
],
|
||||
"arguments": [
|
||||
{
|
||||
"name": "key",
|
||||
"type": "key"
|
||||
},
|
||||
{
|
||||
"name": "element",
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"token": "RAW",
|
||||
"name": "raw",
|
||||
"type": "pure-token",
|
||||
"optional": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"VLINKS": {
|
||||
"summary": "Return the neighbors of an element at each layer in the HNSW graph",
|
||||
"complexity": "O(1)",
|
||||
"group": "vector_set",
|
||||
"since": "1.0.0",
|
||||
"arity": -3,
|
||||
"function": "vlinksCommand",
|
||||
"command_flags": [
|
||||
"READONLY"
|
||||
],
|
||||
"arguments": [
|
||||
{
|
||||
"name": "key",
|
||||
"type": "key"
|
||||
},
|
||||
{
|
||||
"name": "element",
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"token": "WITHSCORES",
|
||||
"name": "withscores",
|
||||
"type": "pure-token",
|
||||
"optional": true
|
||||
}
|
||||
]
|
||||
},
|
||||
"VINFO": {
|
||||
"summary": "Return information about a vector set",
|
||||
"complexity": "O(1)",
|
||||
"group": "vector_set",
|
||||
"since": "1.0.0",
|
||||
"arity": 2,
|
||||
"function": "vinfoCommand",
|
||||
"command_flags": [
|
||||
"READONLY",
|
||||
"FAST"
|
||||
],
|
||||
"arguments": [
|
||||
{
|
||||
"name": "key",
|
||||
"type": "key"
|
||||
}
|
||||
]
|
||||
},
|
||||
"VSETATTR": {
|
||||
"summary": "Associate or remove the JSON attributes of elements",
|
||||
"complexity": "O(1)",
|
||||
"group": "vector_set",
|
||||
"since": "1.0.0",
|
||||
"arity": 4,
|
||||
"function": "vsetattrCommand",
|
||||
"command_flags": [
|
||||
"WRITE"
|
||||
],
|
||||
"arguments": [
|
||||
{
|
||||
"name": "key",
|
||||
"type": "key"
|
||||
},
|
||||
{
|
||||
"name": "element",
|
||||
"type": "string"
|
||||
},
|
||||
{
|
||||
"name": "json",
|
||||
"type": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
"VGETATTR": {
|
||||
"summary": "Retrieve the JSON attributes of elements",
|
||||
"complexity": "O(1)",
|
||||
"group": "vector_set",
|
||||
"since": "1.0.0",
|
||||
"arity": 3,
|
||||
"function": "vgetattrCommand",
|
||||
"command_flags": [
|
||||
"READONLY"
|
||||
],
|
||||
"arguments": [
|
||||
{
|
||||
"name": "key",
|
||||
"type": "key"
|
||||
},
|
||||
{
|
||||
"name": "element",
|
||||
"type": "string"
|
||||
}
|
||||
]
|
||||
},
|
||||
"VRANDMEMBER": {
|
||||
"summary": "Return one or multiple random members from a vector set",
|
||||
"complexity": "O(N) where N is the absolute value of the count argument.",
|
||||
"group": "vector_set",
|
||||
"since": "1.0.0",
|
||||
"arity": -2,
|
||||
"function": "vrandmemberCommand",
|
||||
"command_flags": [
|
||||
"READONLY"
|
||||
],
|
||||
"arguments": [
|
||||
{
|
||||
"name": "key",
|
||||
"type": "key"
|
||||
},
|
||||
{
|
||||
"name": "count",
|
||||
"type": "integer",
|
||||
"optional": true
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
venv
|
|
@ -0,0 +1,44 @@
|
|||
This tool is similar to redis-cli (but very basic) but allows
|
||||
to specify arguments that are expanded as vectors by calling
|
||||
ollama to get the embedding.
|
||||
|
||||
Whatever is passed as !"foo bar" gets expanded into
|
||||
VALUES ... embedding ...
|
||||
|
||||
You must have ollama running with the mxbai-emb-large model
|
||||
already installed for this to work.
|
||||
|
||||
Example:
|
||||
|
||||
redis> KEYS *
|
||||
1) food_items
|
||||
2) glove_embeddings_bin
|
||||
3) many_movies_mxbai-embed-large_BIN
|
||||
4) many_movies_mxbai-embed-large_NOQUANT
|
||||
5) word_embeddings
|
||||
6) word_embeddings_bin
|
||||
7) glove_embeddings_fp32
|
||||
|
||||
redis> VSIM food_items !"drinks with fruit"
|
||||
1) (Fruit)Juices,Lemonade,100ml,50 cal,210 kJ
|
||||
2) (Fruit)Juices,Limeade,100ml,128 cal,538 kJ
|
||||
3) CannedFruit,Canned Fruit Cocktail,100g,81 cal,340 kJ
|
||||
4) (Fruit)Juices,Energy-Drink,100ml,87 cal,365 kJ
|
||||
5) Fruits,Lime,100g,30 cal,126 kJ
|
||||
6) (Fruit)Juices,Coconut Water,100ml,19 cal,80 kJ
|
||||
7) Fruits,Lemon,100g,29 cal,122 kJ
|
||||
8) (Fruit)Juices,Clamato,100ml,60 cal,252 kJ
|
||||
9) Fruits,Fruit salad,100g,50 cal,210 kJ
|
||||
10) (Fruit)Juices,Capri-Sun,100ml,41 cal,172 kJ
|
||||
|
||||
redis> vsim food_items !"barilla"
|
||||
1) Pasta&Noodles,Spirelli,100g,367 cal,1541 kJ
|
||||
2) Pasta&Noodles,Farfalle,100g,358 cal,1504 kJ
|
||||
3) Pasta&Noodles,Capellini,100g,353 cal,1483 kJ
|
||||
4) Pasta&Noodles,Spaetzle,100g,368 cal,1546 kJ
|
||||
5) Pasta&Noodles,Cappelletti,100g,164 cal,689 kJ
|
||||
6) Pasta&Noodles,Penne,100g,351 cal,1474 kJ
|
||||
7) Pasta&Noodles,Shells,100g,353 cal,1483 kJ
|
||||
8) Pasta&Noodles,Linguine,100g,357 cal,1499 kJ
|
||||
9) Pasta&Noodles,Rotini,100g,353 cal,1483 kJ
|
||||
10) Pasta&Noodles,Rigatoni,100g,353 cal,1483 kJ
|
|
@ -0,0 +1,147 @@
|
|||
#
|
||||
# Copyright (c) 2009-Present, Redis Ltd.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
# (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
# GNU Affero General Public License v3 (AGPLv3).
|
||||
#
|
||||
|
||||
#!/usr/bin/env python3
|
||||
import redis
|
||||
import requests
|
||||
import re
|
||||
import shlex
|
||||
from prompt_toolkit import PromptSession
|
||||
from prompt_toolkit.history import InMemoryHistory
|
||||
|
||||
def get_embedding(text):
|
||||
"""Get embedding from local Ollama API"""
|
||||
url = "http://localhost:11434/api/embeddings"
|
||||
payload = {
|
||||
"model": "mxbai-embed-large",
|
||||
"prompt": text
|
||||
}
|
||||
try:
|
||||
response = requests.post(url, json=payload)
|
||||
response.raise_for_status()
|
||||
return response.json()['embedding']
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise Exception(f"Failed to get embedding: {str(e)}")
|
||||
|
||||
def process_embedding_patterns(text):
|
||||
"""Process !"text" and !!"text" patterns in the command"""
|
||||
|
||||
def replace_with_embedding(match):
|
||||
text = match.group(1)
|
||||
embedding = get_embedding(text)
|
||||
return f"VALUES {len(embedding)} {' '.join(map(str, embedding))}"
|
||||
|
||||
def replace_with_embedding_and_text(match):
|
||||
text = match.group(1)
|
||||
embedding = get_embedding(text)
|
||||
# Return both the embedding values and the original text as next argument
|
||||
return f'VALUES {len(embedding)} {" ".join(map(str, embedding))} "{text}"'
|
||||
|
||||
# First handle !!"text" pattern (must be done before !"text")
|
||||
text = re.sub(r'!!"([^"]*)"', replace_with_embedding_and_text, text)
|
||||
# Then handle !"text" pattern
|
||||
text = re.sub(r'!"([^"]*)"', replace_with_embedding, text)
|
||||
return text
|
||||
|
||||
def parse_command(command):
|
||||
"""Parse command respecting quoted strings"""
|
||||
try:
|
||||
# Use shlex to properly handle quoted strings
|
||||
return shlex.split(command)
|
||||
except ValueError as e:
|
||||
raise Exception(f"Invalid command syntax: {str(e)}")
|
||||
|
||||
def format_response(response):
|
||||
"""Format the response to match Redis protocol style"""
|
||||
if response is None:
|
||||
return "(nil)"
|
||||
elif isinstance(response, bool):
|
||||
return "+OK" if response else "(error) Operation failed"
|
||||
elif isinstance(response, (list, set)):
|
||||
if not response:
|
||||
return "(empty list or set)"
|
||||
return "\n".join(f"{i+1}) {item}" for i, item in enumerate(response))
|
||||
elif isinstance(response, int):
|
||||
return f"(integer) {response}"
|
||||
else:
|
||||
return str(response)
|
||||
|
||||
def main():
|
||||
# Default connection to localhost:6379
|
||||
r = redis.Redis(host='localhost', port=6379, decode_responses=True)
|
||||
|
||||
try:
|
||||
# Test connection
|
||||
r.ping()
|
||||
print("Connected to Redis. Type your commands (CTRL+D to exit):")
|
||||
print("Special syntax:")
|
||||
print(" !\"text\" - Replace with embedding")
|
||||
print(" !!\"text\" - Replace with embedding and append text as value")
|
||||
print(" \"text\" - Quote strings containing spaces")
|
||||
except redis.ConnectionError:
|
||||
print("Error: Could not connect to Redis server")
|
||||
return
|
||||
|
||||
# Setup prompt session with history
|
||||
session = PromptSession(history=InMemoryHistory())
|
||||
|
||||
# Main loop
|
||||
while True:
|
||||
try:
|
||||
# Read input with line editing support
|
||||
command = session.prompt("redis> ")
|
||||
|
||||
# Skip empty commands
|
||||
if not command.strip():
|
||||
continue
|
||||
|
||||
# Process any embedding patterns before parsing
|
||||
try:
|
||||
processed_command = process_embedding_patterns(command)
|
||||
except Exception as e:
|
||||
print(f"(error) Embedding processing failed: {str(e)}")
|
||||
continue
|
||||
|
||||
# Parse the command respecting quoted strings
|
||||
try:
|
||||
parts = parse_command(processed_command)
|
||||
except Exception as e:
|
||||
print(f"(error) {str(e)}")
|
||||
continue
|
||||
|
||||
if not parts:
|
||||
continue
|
||||
|
||||
cmd = parts[0].lower()
|
||||
args = parts[1:]
|
||||
|
||||
# Execute command
|
||||
try:
|
||||
method = getattr(r, cmd, None)
|
||||
if method is not None:
|
||||
result = method(*args)
|
||||
else:
|
||||
# Use execute_command for unknown commands
|
||||
result = r.execute_command(cmd, *args)
|
||||
print(format_response(result))
|
||||
except AttributeError:
|
||||
print(f"(error) Unknown command '{cmd}'")
|
||||
|
||||
except EOFError:
|
||||
print("\nGoodbye!")
|
||||
break
|
||||
except KeyboardInterrupt:
|
||||
continue # Allow Ctrl+C to clear current line
|
||||
except redis.RedisError as e:
|
||||
print(f"(error) {str(e)}")
|
||||
except Exception as e:
|
||||
print(f"(error) {str(e)}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,3 @@
|
|||
wget http://ann-benchmarks.com/glove-100-angular.hdf5
|
||||
python insert.py
|
||||
python recall.py (use --k <count> optionally, default top-10)
|
|
@ -0,0 +1,56 @@
|
|||
#
|
||||
# Copyright (c) 2009-Present, Redis Ltd.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
# (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
# GNU Affero General Public License v3 (AGPLv3).
|
||||
#
|
||||
|
||||
import h5py
|
||||
import redis
|
||||
from tqdm import tqdm
|
||||
|
||||
# Initialize Redis connection
|
||||
redis_client = redis.Redis(host='localhost', port=6379, decode_responses=True, encoding='utf-8')
|
||||
|
||||
def add_to_redis(index, embedding):
|
||||
"""Add embedding to Redis using VADD command"""
|
||||
args = ["VADD", "glove_embeddings", "VALUES", "100"] # 100 is vector dimension
|
||||
args.extend(map(str, embedding))
|
||||
args.append(f"{index}") # Using index as identifier since we don't have words
|
||||
args.append("EF")
|
||||
args.append("200")
|
||||
# args.append("NOQUANT")
|
||||
# args.append("BIN")
|
||||
redis_client.execute_command(*args)
|
||||
|
||||
def main():
|
||||
with h5py.File('glove-100-angular.hdf5', 'r') as f:
|
||||
# Get the train dataset
|
||||
train_vectors = f['train']
|
||||
total_vectors = train_vectors.shape[0]
|
||||
|
||||
print(f"Starting to process {total_vectors} vectors...")
|
||||
|
||||
# Process in batches to avoid memory issues
|
||||
batch_size = 1000
|
||||
|
||||
for i in tqdm(range(0, total_vectors, batch_size)):
|
||||
batch_end = min(i + batch_size, total_vectors)
|
||||
batch = train_vectors[i:batch_end]
|
||||
|
||||
for j, vector in enumerate(batch):
|
||||
try:
|
||||
current_index = i + j
|
||||
add_to_redis(current_index, vector)
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing vector {current_index}: {str(e)}")
|
||||
continue
|
||||
|
||||
if (i + batch_size) % 10000 == 0:
|
||||
print(f"Processed {i + batch_size} vectors")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,87 @@
|
|||
#
|
||||
# Copyright (c) 2009-Present, Redis Ltd.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
# (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
# GNU Affero General Public License v3 (AGPLv3).
|
||||
#
|
||||
|
||||
import h5py
|
||||
import redis
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
import argparse
|
||||
|
||||
# Initialize Redis connection
|
||||
redis_client = redis.Redis(host='localhost', port=6379, decode_responses=True, encoding='utf-8')
|
||||
|
||||
def get_redis_neighbors(query_vector, k):
|
||||
"""Get nearest neighbors using Redis VSIM command"""
|
||||
args = ["VSIM", "glove_embeddings_bin", "VALUES", "100"]
|
||||
args.extend(map(str, query_vector))
|
||||
args.extend(["COUNT", str(k)])
|
||||
args.extend(["EF", 100])
|
||||
if False:
|
||||
print(args)
|
||||
exit(1)
|
||||
results = redis_client.execute_command(*args)
|
||||
return [int(res) for res in results]
|
||||
|
||||
def calculate_recall(ground_truth, predicted, k):
|
||||
"""Calculate recall@k"""
|
||||
relevant = set(ground_truth[:k])
|
||||
retrieved = set(predicted[:k])
|
||||
return len(relevant.intersection(retrieved)) / len(relevant)
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Evaluate Redis VSIM recall')
|
||||
parser.add_argument('--k', type=int, default=10, help='Number of neighbors to evaluate (default: 10)')
|
||||
parser.add_argument('--batch', type=int, default=100, help='Progress update frequency (default: 100)')
|
||||
args = parser.parse_args()
|
||||
|
||||
k = args.k
|
||||
batch_size = args.batch
|
||||
|
||||
with h5py.File('glove-100-angular.hdf5', 'r') as f:
|
||||
test_vectors = f['test'][:]
|
||||
ground_truth_neighbors = f['neighbors'][:]
|
||||
|
||||
num_queries = len(test_vectors)
|
||||
recalls = []
|
||||
|
||||
print(f"Evaluating recall@{k} for {num_queries} test queries...")
|
||||
|
||||
for i in tqdm(range(num_queries)):
|
||||
try:
|
||||
# Get Redis results
|
||||
redis_neighbors = get_redis_neighbors(test_vectors[i], k)
|
||||
|
||||
# Get ground truth for this query
|
||||
true_neighbors = ground_truth_neighbors[i]
|
||||
|
||||
# Calculate recall
|
||||
recall = calculate_recall(true_neighbors, redis_neighbors, k)
|
||||
recalls.append(recall)
|
||||
|
||||
if (i + 1) % batch_size == 0:
|
||||
current_avg_recall = np.mean(recalls)
|
||||
print(f"Current average recall@{k} after {i+1} queries: {current_avg_recall:.4f}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing query {i}: {str(e)}")
|
||||
continue
|
||||
|
||||
final_recall = np.mean(recalls)
|
||||
print("\nFinal Results:")
|
||||
print(f"Average recall@{k}: {final_recall:.4f}")
|
||||
print(f"Total queries evaluated: {len(recalls)}")
|
||||
|
||||
# Save detailed results
|
||||
with open(f'recall_evaluation_results_k{k}.txt', 'w') as f:
|
||||
f.write(f"Average recall@{k}: {final_recall:.4f}\n")
|
||||
f.write(f"Total queries evaluated: {len(recalls)}\n")
|
||||
f.write(f"Individual query recalls: {recalls}\n")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,2 @@
|
|||
mpst_full_data.csv
|
||||
partition.json
|
|
@ -0,0 +1,30 @@
|
|||
This example maps long form movies plots to movies titles.
|
||||
It will create fp32 and binary vectors (the two extremes).
|
||||
|
||||
1. Install ollama, and install the embedding model "mxbai-embed-large"
|
||||
2. Download mpst_full_data.csv from https://www.kaggle.com/datasets/cryptexcode/mpst-movie-plot-synopses-with-tags
|
||||
3. python insert.py
|
||||
|
||||
127.0.0.1:6379> VSIM many_movies_mxbai-embed-large_NOQUANT ELE "The Matrix"
|
||||
1) "The Matrix"
|
||||
2) "The Matrix Reloaded"
|
||||
3) "The Matrix Revolutions"
|
||||
4) "Commando"
|
||||
5) "Avatar"
|
||||
6) "Forbidden Planet"
|
||||
7) "Terminator Salvation"
|
||||
8) "Mandroid"
|
||||
9) "The Omega Code"
|
||||
10) "Coherence"
|
||||
|
||||
127.0.0.1:6379> VSIM many_movies_mxbai-embed-large_BIN ELE "The Matrix"
|
||||
1) "The Matrix"
|
||||
2) "The Matrix Reloaded"
|
||||
3) "The Matrix Revolutions"
|
||||
4) "The Omega Code"
|
||||
5) "Forbidden Planet"
|
||||
6) "Avatar"
|
||||
7) "John Carter"
|
||||
8) "System Shock 2"
|
||||
9) "Coherence"
|
||||
10) "Tomorrowland"
|
|
@ -0,0 +1,57 @@
|
|||
#
|
||||
# Copyright (c) 2009-Present, Redis Ltd.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
# (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
# GNU Affero General Public License v3 (AGPLv3).
|
||||
#
|
||||
|
||||
import csv
|
||||
import requests
|
||||
import redis
|
||||
|
||||
ModelName="mxbai-embed-large"
|
||||
|
||||
# Initialize Redis connection, setting encoding to utf-8
|
||||
redis_client = redis.Redis(host='localhost', port=6379, decode_responses=True, encoding='utf-8')
|
||||
|
||||
def get_embedding(text):
|
||||
"""Get embedding from local API"""
|
||||
url = "http://localhost:11434/api/embeddings"
|
||||
payload = {
|
||||
"model": ModelName,
|
||||
"prompt": "Represent this movie plot and genre: "+text
|
||||
}
|
||||
response = requests.post(url, json=payload)
|
||||
return response.json()['embedding']
|
||||
|
||||
def add_to_redis(title, embedding, quant_type):
|
||||
"""Add embedding to Redis using VADD command"""
|
||||
args = ["VADD", "many_movies_"+ModelName+"_"+quant_type, "VALUES", str(len(embedding))]
|
||||
args.extend(map(str, embedding))
|
||||
args.append(title)
|
||||
args.append(quant_type)
|
||||
redis_client.execute_command(*args)
|
||||
|
||||
def main():
|
||||
with open('mpst_full_data.csv', 'r', encoding='utf-8') as file:
|
||||
reader = csv.DictReader(file)
|
||||
|
||||
for movie in reader:
|
||||
try:
|
||||
text_to_embed = f"{movie['title']} {movie['plot_synopsis']} {movie['tags']}"
|
||||
|
||||
print(f"Getting embedding for: {movie['title']}")
|
||||
embedding = get_embedding(text_to_embed)
|
||||
|
||||
add_to_redis(movie['title'], embedding, "BIN")
|
||||
add_to_redis(movie['title'], embedding, "NOQUANT")
|
||||
print(f"Successfully processed: {movie['title']}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error processing {movie['title']}: {str(e)}")
|
||||
continue
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,942 @@
|
|||
/* Filtering of objects based on simple expressions.
|
||||
* This powers the FILTER option of Vector Sets, but it is otherwise
|
||||
* general code to be used when we want to tell if a given object (with fields)
|
||||
* passes or fails a given test for scalars, strings, ...
|
||||
*
|
||||
* Copyright (c) 2009-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
* Originally authored by: Salvatore Sanfilippo.
|
||||
*/
|
||||
|
||||
#ifdef TEST_MAIN
|
||||
#define RedisModule_Alloc malloc
|
||||
#define RedisModule_Realloc realloc
|
||||
#define RedisModule_Free free
|
||||
#define RedisModule_Strdup strdup
|
||||
#define RedisModule_Assert assert
|
||||
#define _DEFAULT_SOURCE
|
||||
#define _USE_MATH_DEFINES
|
||||
#define _POSIX_C_SOURCE 200809L
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#define EXPR_TOKEN_EOF 0
|
||||
#define EXPR_TOKEN_NUM 1
|
||||
#define EXPR_TOKEN_STR 2
|
||||
#define EXPR_TOKEN_TUPLE 3
|
||||
#define EXPR_TOKEN_SELECTOR 4
|
||||
#define EXPR_TOKEN_OP 5
|
||||
#define EXPR_TOKEN_NULL 6
|
||||
|
||||
#define EXPR_OP_OPAREN 0 /* ( */
|
||||
#define EXPR_OP_CPAREN 1 /* ) */
|
||||
#define EXPR_OP_NOT 2 /* ! */
|
||||
#define EXPR_OP_POW 3 /* ** */
|
||||
#define EXPR_OP_MULT 4 /* * */
|
||||
#define EXPR_OP_DIV 5 /* / */
|
||||
#define EXPR_OP_MOD 6 /* % */
|
||||
#define EXPR_OP_SUM 7 /* + */
|
||||
#define EXPR_OP_DIFF 8 /* - */
|
||||
#define EXPR_OP_GT 9 /* > */
|
||||
#define EXPR_OP_GTE 10 /* >= */
|
||||
#define EXPR_OP_LT 11 /* < */
|
||||
#define EXPR_OP_LTE 12 /* <= */
|
||||
#define EXPR_OP_EQ 13 /* == */
|
||||
#define EXPR_OP_NEQ 14 /* != */
|
||||
#define EXPR_OP_IN 15 /* in */
|
||||
#define EXPR_OP_AND 16 /* and */
|
||||
#define EXPR_OP_OR 17 /* or */
|
||||
|
||||
/* This structure represents a token in our expression. It's either
|
||||
* literals like 4, "foo", or operators like "+", "-", "and", or
|
||||
* json selectors, that start with a dot: ".age", ".properties.somearray[1]" */
|
||||
typedef struct exprtoken {
|
||||
int refcount; // Reference counting for memory reclaiming.
|
||||
int token_type; // Token type of the just parsed token.
|
||||
int offset; // Chars offset in expression.
|
||||
union {
|
||||
double num; // Value for EXPR_TOKEN_NUM.
|
||||
struct {
|
||||
char *start; // String pointer for EXPR_TOKEN_STR / SELECTOR.
|
||||
size_t len; // String len for EXPR_TOKEN_STR / SELECTOR.
|
||||
char *heapstr; // True if we have a private allocation for this
|
||||
// string. When possible, it just references to the
|
||||
// string expression we compiled, exprstate->expr.
|
||||
} str;
|
||||
int opcode; // Opcode ID for EXPR_TOKEN_OP.
|
||||
struct {
|
||||
struct exprtoken **ele;
|
||||
size_t len;
|
||||
} tuple; // Tuples are like [1, 2, 3] for "in" operator.
|
||||
};
|
||||
} exprtoken;
|
||||
|
||||
/* Simple stack of expr tokens. This is used both to represent the stack
|
||||
* of values and the stack of operands during VM execution. */
|
||||
typedef struct exprstack {
|
||||
exprtoken **items;
|
||||
int numitems;
|
||||
int allocsize;
|
||||
} exprstack;
|
||||
|
||||
typedef struct exprstate {
|
||||
char *expr; /* Expression string to compile. Note that
|
||||
* expression token strings point directly to this
|
||||
* string. */
|
||||
char *p; // Current position inside 'expr', while parsing.
|
||||
|
||||
// Virtual machine state.
|
||||
exprstack values_stack;
|
||||
exprstack ops_stack; // Operator stack used during compilation.
|
||||
exprstack tokens; // Expression processed into a sequence of tokens.
|
||||
exprstack program; // Expression compiled into opcodes and values.
|
||||
} exprstate;
|
||||
|
||||
/* Valid operators. */
|
||||
struct {
|
||||
char *opname;
|
||||
int oplen;
|
||||
int opcode;
|
||||
int precedence;
|
||||
int arity;
|
||||
} ExprOptable[] = {
|
||||
{"(", 1, EXPR_OP_OPAREN, 7, 0},
|
||||
{")", 1, EXPR_OP_CPAREN, 7, 0},
|
||||
{"!", 1, EXPR_OP_NOT, 6, 1},
|
||||
{"not", 3, EXPR_OP_NOT, 6, 1},
|
||||
{"**", 2, EXPR_OP_POW, 5, 2},
|
||||
{"*", 1, EXPR_OP_MULT, 4, 2},
|
||||
{"/", 1, EXPR_OP_DIV, 4, 2},
|
||||
{"%", 1, EXPR_OP_MOD, 4, 2},
|
||||
{"+", 1, EXPR_OP_SUM, 3, 2},
|
||||
{"-", 1, EXPR_OP_DIFF, 3, 2},
|
||||
{">", 1, EXPR_OP_GT, 2, 2},
|
||||
{">=", 2, EXPR_OP_GTE, 2, 2},
|
||||
{"<", 1, EXPR_OP_LT, 2, 2},
|
||||
{"<=", 2, EXPR_OP_LTE, 2, 2},
|
||||
{"==", 2, EXPR_OP_EQ, 2, 2},
|
||||
{"!=", 2, EXPR_OP_NEQ, 2, 2},
|
||||
{"in", 2, EXPR_OP_IN, 2, 2},
|
||||
{"and", 3, EXPR_OP_AND, 1, 2},
|
||||
{"&&", 2, EXPR_OP_AND, 1, 2},
|
||||
{"or", 2, EXPR_OP_OR, 0, 2},
|
||||
{"||", 2, EXPR_OP_OR, 0, 2},
|
||||
{NULL, 0, 0, 0, 0} // Terminator.
|
||||
};
|
||||
|
||||
#define EXPR_OP_SPECIALCHARS "+-*%/!()<>=|&"
|
||||
#define EXPR_SELECTOR_SPECIALCHARS "_-"
|
||||
|
||||
/* ================================ Expr token ============================== */
|
||||
|
||||
/* Return an heap allocated token of the specified type, setting the
|
||||
* reference count to 1. */
|
||||
exprtoken *exprNewToken(int type) {
|
||||
exprtoken *t = RedisModule_Alloc(sizeof(exprtoken));
|
||||
memset(t,0,sizeof(*t));
|
||||
t->token_type = type;
|
||||
t->refcount = 1;
|
||||
return t;
|
||||
}
|
||||
|
||||
/* Generic free token function, can be used to free stack allocated
|
||||
* objects (in this case the pointer itself will not be freed) or
|
||||
* heap allocated objects. See the wrappers below. */
|
||||
void exprTokenRelease(exprtoken *t) {
|
||||
if (t == NULL) return;
|
||||
|
||||
RedisModule_Assert(t->refcount > 0); // Catch double free & more.
|
||||
t->refcount--;
|
||||
if (t->refcount > 0) return;
|
||||
|
||||
// We reached refcount 0: free the object.
|
||||
if (t->token_type == EXPR_TOKEN_STR) {
|
||||
if (t->str.heapstr != NULL) RedisModule_Free(t->str.heapstr);
|
||||
} else if (t->token_type == EXPR_TOKEN_TUPLE) {
|
||||
for (size_t j = 0; j < t->tuple.len; j++)
|
||||
exprTokenRelease(t->tuple.ele[j]);
|
||||
if (t->tuple.ele) RedisModule_Free(t->tuple.ele);
|
||||
}
|
||||
RedisModule_Free(t);
|
||||
}
|
||||
|
||||
void exprTokenRetain(exprtoken *t) {
|
||||
t->refcount++;
|
||||
}
|
||||
|
||||
/* ============================== Stack handling ============================ */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define EXPR_STACK_INITIAL_SIZE 16
|
||||
|
||||
/* Initialize a new expression stack. */
|
||||
void exprStackInit(exprstack *stack) {
|
||||
stack->items = RedisModule_Alloc(sizeof(exprtoken*) * EXPR_STACK_INITIAL_SIZE);
|
||||
stack->numitems = 0;
|
||||
stack->allocsize = EXPR_STACK_INITIAL_SIZE;
|
||||
}
|
||||
|
||||
/* Push a token pointer onto the stack. Does not increment the refcount
|
||||
* of the token: it is up to the caller doing this. */
|
||||
void exprStackPush(exprstack *stack, exprtoken *token) {
|
||||
/* Check if we need to grow the stack. */
|
||||
if (stack->numitems == stack->allocsize) {
|
||||
size_t newsize = stack->allocsize * 2;
|
||||
exprtoken **newitems =
|
||||
RedisModule_Realloc(stack->items, sizeof(exprtoken*) * newsize);
|
||||
stack->items = newitems;
|
||||
stack->allocsize = newsize;
|
||||
}
|
||||
stack->items[stack->numitems] = token;
|
||||
stack->numitems++;
|
||||
}
|
||||
|
||||
/* Pop a token pointer from the stack. Return NULL if the stack is
|
||||
* empty. Does NOT recrement the refcount of the token, it's up to the
|
||||
* caller to do so, as the new owner of the reference. */
|
||||
exprtoken *exprStackPop(exprstack *stack) {
|
||||
if (stack->numitems == 0) return NULL;
|
||||
stack->numitems--;
|
||||
return stack->items[stack->numitems];
|
||||
}
|
||||
|
||||
/* Just return the last element pushed, without consuming it nor altering
|
||||
* the reference count. */
|
||||
exprtoken *exprStackPeek(exprstack *stack) {
|
||||
if (stack->numitems == 0) return NULL;
|
||||
return stack->items[stack->numitems-1];
|
||||
}
|
||||
|
||||
/* Free the stack structure state, including the items it contains, that are
|
||||
* assumed to be heap allocated. The passed pointer itself is not freed. */
|
||||
void exprStackFree(exprstack *stack) {
|
||||
for (int j = 0; j < stack->numitems; j++)
|
||||
exprTokenRelease(stack->items[j]);
|
||||
RedisModule_Free(stack->items);
|
||||
}
|
||||
|
||||
/* Just reset the stack removing all the items, but leaving it in a state
|
||||
* that makes it still usable for new elements. */
|
||||
void exprStackReset(exprstack *stack) {
|
||||
for (int j = 0; j < stack->numitems; j++)
|
||||
exprTokenRelease(stack->items[j]);
|
||||
stack->numitems = 0;
|
||||
}
|
||||
|
||||
/* =========================== Expression compilation ======================= */
|
||||
|
||||
void exprConsumeSpaces(exprstate *es) {
|
||||
while(es->p[0] && isspace(es->p[0])) es->p++;
|
||||
}
|
||||
|
||||
/* Parse an operator or a literal (just "null" currently).
|
||||
* When parsing operators, the function will try to match the longest match
|
||||
* in the operators table. */
|
||||
exprtoken *exprParseOperatorOrLiteral(exprstate *es) {
|
||||
exprtoken *t = exprNewToken(EXPR_TOKEN_OP);
|
||||
char *start = es->p;
|
||||
|
||||
while(es->p[0] &&
|
||||
(isalpha(es->p[0]) ||
|
||||
strchr(EXPR_OP_SPECIALCHARS,es->p[0]) != NULL))
|
||||
{
|
||||
es->p++;
|
||||
}
|
||||
|
||||
int matchlen = es->p - start;
|
||||
int bestlen = 0;
|
||||
int j;
|
||||
|
||||
// Check if it's a literal.
|
||||
if (matchlen == 4 && !memcmp("null",start,4)) {
|
||||
t->token_type = EXPR_TOKEN_NULL;
|
||||
return t;
|
||||
}
|
||||
|
||||
// Find the longest matching operator.
|
||||
for (j = 0; ExprOptable[j].opname != NULL; j++) {
|
||||
if (ExprOptable[j].oplen > matchlen) continue;
|
||||
if (memcmp(ExprOptable[j].opname, start, ExprOptable[j].oplen) != 0)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (ExprOptable[j].oplen > bestlen) {
|
||||
t->opcode = ExprOptable[j].opcode;
|
||||
bestlen = ExprOptable[j].oplen;
|
||||
}
|
||||
}
|
||||
if (bestlen == 0) {
|
||||
exprTokenRelease(t);
|
||||
return NULL;
|
||||
} else {
|
||||
es->p = start + bestlen;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
// Valid selector charset.
|
||||
static int is_selector_char(int c) {
|
||||
return (isalpha(c) ||
|
||||
isdigit(c) ||
|
||||
strchr(EXPR_SELECTOR_SPECIALCHARS,c) != NULL);
|
||||
}
|
||||
|
||||
/* Parse selectors, they start with a dot and can have alphanumerical
|
||||
* or few special chars. */
|
||||
exprtoken *exprParseSelector(exprstate *es) {
|
||||
exprtoken *t = exprNewToken(EXPR_TOKEN_SELECTOR);
|
||||
es->p++; // Skip dot.
|
||||
char *start = es->p;
|
||||
|
||||
while(es->p[0] && is_selector_char(es->p[0])) es->p++;
|
||||
int matchlen = es->p - start;
|
||||
t->str.start = start;
|
||||
t->str.len = matchlen;
|
||||
return t;
|
||||
}
|
||||
|
||||
exprtoken *exprParseNumber(exprstate *es) {
|
||||
exprtoken *t = exprNewToken(EXPR_TOKEN_NUM);
|
||||
char num[256];
|
||||
int idx = 0;
|
||||
while(isdigit(es->p[0]) || es->p[0] == '.' || es->p[0] == 'e' ||
|
||||
es->p[0] == 'E' || (idx == 0 && es->p[0] == '-'))
|
||||
{
|
||||
if (idx >= (int)sizeof(num)-1) {
|
||||
exprTokenRelease(t);
|
||||
return NULL;
|
||||
}
|
||||
num[idx++] = es->p[0];
|
||||
es->p++;
|
||||
}
|
||||
num[idx] = 0;
|
||||
|
||||
char *endptr;
|
||||
t->num = strtod(num, &endptr);
|
||||
if (*endptr != '\0') {
|
||||
exprTokenRelease(t);
|
||||
return NULL;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
exprtoken *exprParseString(exprstate *es) {
|
||||
char quote = es->p[0]; /* Store the quote type (' or "). */
|
||||
es->p++; /* Skip opening quote. */
|
||||
|
||||
exprtoken *t = exprNewToken(EXPR_TOKEN_STR);
|
||||
t->str.start = es->p;
|
||||
|
||||
while(es->p[0] != '\0') {
|
||||
if (es->p[0] == '\\' && es->p[1] != '\0') {
|
||||
es->p += 2; // Skip escaped char.
|
||||
continue;
|
||||
}
|
||||
if (es->p[0] == quote) {
|
||||
t->str.len = es->p - t->str.start;
|
||||
es->p++; // Skip closing quote.
|
||||
return t;
|
||||
}
|
||||
es->p++;
|
||||
}
|
||||
/* If we reach here, string was not terminated. */
|
||||
exprTokenRelease(t);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Parse a tuple of the form [1, "foo", 42]. No nested tuples are
|
||||
* supported. This type is useful mostly to be used with the "IN"
|
||||
* operator. */
|
||||
exprtoken *exprParseTuple(exprstate *es) {
|
||||
exprtoken *t = exprNewToken(EXPR_TOKEN_TUPLE);
|
||||
t->tuple.ele = NULL;
|
||||
t->tuple.len = 0;
|
||||
es->p++; /* Skip opening '['. */
|
||||
|
||||
size_t allocated = 0;
|
||||
while(1) {
|
||||
exprConsumeSpaces(es);
|
||||
|
||||
/* Check for empty tuple or end. */
|
||||
if (es->p[0] == ']') {
|
||||
es->p++;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Grow tuple array if needed. */
|
||||
if (t->tuple.len == allocated) {
|
||||
size_t newsize = allocated == 0 ? 4 : allocated * 2;
|
||||
exprtoken **newele = RedisModule_Realloc(t->tuple.ele,
|
||||
sizeof(exprtoken*) * newsize);
|
||||
t->tuple.ele = newele;
|
||||
allocated = newsize;
|
||||
}
|
||||
|
||||
/* Parse tuple element. */
|
||||
exprtoken *ele = NULL;
|
||||
if (isdigit(es->p[0]) || es->p[0] == '-') {
|
||||
ele = exprParseNumber(es);
|
||||
} else if (es->p[0] == '"' || es->p[0] == '\'') {
|
||||
ele = exprParseString(es);
|
||||
} else {
|
||||
exprTokenRelease(t);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Error parsing number/string? */
|
||||
if (ele == NULL) {
|
||||
exprTokenRelease(t);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Store element if no error was detected. */
|
||||
t->tuple.ele[t->tuple.len] = ele;
|
||||
t->tuple.len++;
|
||||
|
||||
/* Check for next element. */
|
||||
exprConsumeSpaces(es);
|
||||
if (es->p[0] == ']') {
|
||||
es->p++;
|
||||
break;
|
||||
}
|
||||
if (es->p[0] != ',') {
|
||||
exprTokenRelease(t);
|
||||
return NULL;
|
||||
}
|
||||
es->p++; /* Skip comma. */
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
/* Deallocate the object returned by exprCompile(). */
|
||||
void exprFree(exprstate *es) {
|
||||
if (es == NULL) return;
|
||||
|
||||
/* Free the original expression string. */
|
||||
if (es->expr) RedisModule_Free(es->expr);
|
||||
|
||||
/* Free all stacks. */
|
||||
exprStackFree(&es->values_stack);
|
||||
exprStackFree(&es->ops_stack);
|
||||
exprStackFree(&es->tokens);
|
||||
exprStackFree(&es->program);
|
||||
|
||||
/* Free the state object itself. */
|
||||
RedisModule_Free(es);
|
||||
}
|
||||
|
||||
/* Split the provided expression into a stack of tokens. Returns
|
||||
* 0 on success, 1 on error. */
|
||||
int exprTokenize(exprstate *es, int *errpos) {
|
||||
/* Main parsing loop. */
|
||||
while(1) {
|
||||
exprConsumeSpaces(es);
|
||||
|
||||
/* Set a flag to see if we can consider the - part of the
|
||||
* number, or an operator. */
|
||||
int minus_is_number = 0; // By default is an operator.
|
||||
|
||||
exprtoken *last = exprStackPeek(&es->tokens);
|
||||
if (last == NULL) {
|
||||
/* If we are at the start of an expression, the minus is
|
||||
* considered a number. */
|
||||
minus_is_number = 1;
|
||||
} else if (last->token_type == EXPR_TOKEN_OP &&
|
||||
last->opcode != EXPR_OP_CPAREN)
|
||||
{
|
||||
/* Also, if the previous token was an operator, the minus
|
||||
* is considered a number, unless the previous operator is
|
||||
* a closing parens. In such case it's like (...) -5, or alike
|
||||
* and we want to emit an operator. */
|
||||
minus_is_number = 1;
|
||||
}
|
||||
|
||||
/* Parse based on the current character. */
|
||||
exprtoken *current = NULL;
|
||||
if (*es->p == '\0') {
|
||||
current = exprNewToken(EXPR_TOKEN_EOF);
|
||||
} else if (isdigit(*es->p) ||
|
||||
(minus_is_number && *es->p == '-' && isdigit(es->p[1])))
|
||||
{
|
||||
current = exprParseNumber(es);
|
||||
} else if (*es->p == '"' || *es->p == '\'') {
|
||||
current = exprParseString(es);
|
||||
} else if (*es->p == '.' && is_selector_char(es->p[1])) {
|
||||
current = exprParseSelector(es);
|
||||
} else if (*es->p == '[') {
|
||||
current = exprParseTuple(es);
|
||||
} else if (isalpha(*es->p) || strchr(EXPR_OP_SPECIALCHARS, *es->p)) {
|
||||
current = exprParseOperatorOrLiteral(es);
|
||||
}
|
||||
|
||||
if (current == NULL) {
|
||||
if (errpos) *errpos = es->p - es->expr;
|
||||
return 1; // Syntax Error.
|
||||
}
|
||||
|
||||
/* Push the current token to tokens stack. */
|
||||
exprStackPush(&es->tokens, current);
|
||||
if (current->token_type == EXPR_TOKEN_EOF) break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Helper function to get operator precedence from the operator table. */
|
||||
int exprGetOpPrecedence(int opcode) {
|
||||
for (int i = 0; ExprOptable[i].opname != NULL; i++) {
|
||||
if (ExprOptable[i].opcode == opcode)
|
||||
return ExprOptable[i].precedence;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Helper function to get operator arity from the operator table. */
|
||||
int exprGetOpArity(int opcode) {
|
||||
for (int i = 0; ExprOptable[i].opname != NULL; i++) {
|
||||
if (ExprOptable[i].opcode == opcode)
|
||||
return ExprOptable[i].arity;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Process an operator during compilation. Returns 0 on success, 1 on error.
|
||||
* This function will retain a reference of the operator 'op' in case it
|
||||
* is pushed on the operators stack. */
|
||||
int exprProcessOperator(exprstate *es, exprtoken *op, int *stack_items, int *errpos) {
|
||||
if (op->opcode == EXPR_OP_OPAREN) {
|
||||
// This is just a marker for us. Do nothing.
|
||||
exprStackPush(&es->ops_stack, op);
|
||||
exprTokenRetain(op);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (op->opcode == EXPR_OP_CPAREN) {
|
||||
/* Process operators until we find the matching opening parenthesis. */
|
||||
while (1) {
|
||||
exprtoken *top_op = exprStackPop(&es->ops_stack);
|
||||
if (top_op == NULL) {
|
||||
if (errpos) *errpos = op->offset;
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (top_op->opcode == EXPR_OP_OPAREN) {
|
||||
/* Open parethesis found. Our work finished. */
|
||||
exprTokenRelease(top_op);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int arity = exprGetOpArity(top_op->opcode);
|
||||
if (*stack_items < arity) {
|
||||
exprTokenRelease(top_op);
|
||||
if (errpos) *errpos = top_op->offset;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Move the operator on the program stack. */
|
||||
exprStackPush(&es->program, top_op);
|
||||
*stack_items = *stack_items - arity + 1;
|
||||
}
|
||||
}
|
||||
|
||||
int curr_prec = exprGetOpPrecedence(op->opcode);
|
||||
|
||||
/* Process operators with higher or equal precedence. */
|
||||
while (1) {
|
||||
exprtoken *top_op = exprStackPeek(&es->ops_stack);
|
||||
if (top_op == NULL || top_op->opcode == EXPR_OP_OPAREN) break;
|
||||
|
||||
int top_prec = exprGetOpPrecedence(top_op->opcode);
|
||||
if (top_prec < curr_prec) break;
|
||||
/* Special case for **: only pop if precedence is strictly higher
|
||||
* so that the operator is right associative, that is:
|
||||
* 2 ** 3 ** 2 is evaluated as 2 ** (3 ** 2) == 512 instead
|
||||
* of (2 ** 3) ** 2 == 64. */
|
||||
if (op->opcode == EXPR_OP_POW && top_prec <= curr_prec) break;
|
||||
|
||||
/* Pop and add to program. */
|
||||
top_op = exprStackPop(&es->ops_stack);
|
||||
int arity = exprGetOpArity(top_op->opcode);
|
||||
if (*stack_items < arity) {
|
||||
exprTokenRelease(top_op);
|
||||
if (errpos) *errpos = top_op->offset;
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Move to the program stack. */
|
||||
exprStackPush(&es->program, top_op);
|
||||
*stack_items = *stack_items - arity + 1;
|
||||
}
|
||||
|
||||
/* Push current operator. */
|
||||
exprStackPush(&es->ops_stack, op);
|
||||
exprTokenRetain(op);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Compile the expression into a set of push-value and exec-operator
|
||||
* that exprRun() can execute. The function returns an expstate object
|
||||
* that can be used for execution of the program. On error, NULL
|
||||
* is returned, and optionally the position of the error into the
|
||||
* expression is returned by reference. */
|
||||
exprstate *exprCompile(char *expr, int *errpos) {
|
||||
/* Initialize expression state. */
|
||||
exprstate *es = RedisModule_Alloc(sizeof(exprstate));
|
||||
es->expr = RedisModule_Strdup(expr);
|
||||
es->p = es->expr;
|
||||
|
||||
/* Initialize all stacks. */
|
||||
exprStackInit(&es->values_stack);
|
||||
exprStackInit(&es->ops_stack);
|
||||
exprStackInit(&es->tokens);
|
||||
exprStackInit(&es->program);
|
||||
|
||||
/* Tokenization. */
|
||||
if (exprTokenize(es, errpos)) {
|
||||
exprFree(es);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Compile the expression into a sequence of operations. */
|
||||
int stack_items = 0; // Track # of items that would be on the stack
|
||||
// during execution. This way we can detect arity
|
||||
// issues at compile time.
|
||||
|
||||
/* Process each token. */
|
||||
for (int i = 0; i < es->tokens.numitems; i++) {
|
||||
exprtoken *token = es->tokens.items[i];
|
||||
|
||||
if (token->token_type == EXPR_TOKEN_EOF) break;
|
||||
|
||||
/* Handle values (numbers, strings, selectors). */
|
||||
if (token->token_type == EXPR_TOKEN_NUM ||
|
||||
token->token_type == EXPR_TOKEN_STR ||
|
||||
token->token_type == EXPR_TOKEN_TUPLE ||
|
||||
token->token_type == EXPR_TOKEN_SELECTOR)
|
||||
{
|
||||
exprStackPush(&es->program, token);
|
||||
exprTokenRetain(token);
|
||||
stack_items++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Handle operators. */
|
||||
if (token->token_type == EXPR_TOKEN_OP) {
|
||||
if (exprProcessOperator(es, token, &stack_items, errpos)) {
|
||||
exprFree(es);
|
||||
return NULL;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* Process remaining operators on the stack. */
|
||||
while (es->ops_stack.numitems > 0) {
|
||||
exprtoken *op = exprStackPop(&es->ops_stack);
|
||||
if (op->opcode == EXPR_OP_OPAREN) {
|
||||
if (errpos) *errpos = op->offset;
|
||||
exprTokenRelease(op);
|
||||
exprFree(es);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int arity = exprGetOpArity(op->opcode);
|
||||
if (stack_items < arity) {
|
||||
if (errpos) *errpos = op->offset;
|
||||
exprTokenRelease(op);
|
||||
exprFree(es);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
exprStackPush(&es->program, op);
|
||||
stack_items = stack_items - arity + 1;
|
||||
}
|
||||
|
||||
/* Verify that exactly one value would remain on the stack after
|
||||
* execution. We could also check that such value is a number, but this
|
||||
* would make the code more complex without much gains. */
|
||||
if (stack_items != 1) {
|
||||
if (errpos) {
|
||||
/* Point to the last token's offset for error reporting. */
|
||||
exprtoken *last = es->tokens.items[es->tokens.numitems - 1];
|
||||
*errpos = last->offset;
|
||||
}
|
||||
exprFree(es);
|
||||
return NULL;
|
||||
}
|
||||
return es;
|
||||
}
|
||||
|
||||
/* ============================ Expression execution ======================== */
|
||||
|
||||
/* Convert a token to its numeric value. For strings we attempt to parse them
|
||||
* as numbers, returning 0 if conversion fails. */
|
||||
double exprTokenToNum(exprtoken *t) {
|
||||
char buf[256];
|
||||
if (t->token_type == EXPR_TOKEN_NUM) {
|
||||
return t->num;
|
||||
} else if (t->token_type == EXPR_TOKEN_STR && t->str.len < sizeof(buf)) {
|
||||
memcpy(buf, t->str.start, t->str.len);
|
||||
buf[t->str.len] = '\0';
|
||||
char *endptr;
|
||||
double val = strtod(buf, &endptr);
|
||||
return *endptr == '\0' ? val : 0;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* Convert object to true/false (0 or 1) */
|
||||
double exprTokenToBool(exprtoken *t) {
|
||||
if (t->token_type == EXPR_TOKEN_NUM) {
|
||||
return t->num != 0;
|
||||
} else if (t->token_type == EXPR_TOKEN_STR && t->str.len == 0) {
|
||||
return 0; // Empty string are false, like in Javascript.
|
||||
} else if (t->token_type == EXPR_TOKEN_NULL) {
|
||||
return 0; // Null is surely more false than true...
|
||||
} else {
|
||||
return 1; // Every non numerical type is true.
|
||||
}
|
||||
}
|
||||
|
||||
/* Compare two tokens. Returns true if they are equal. */
|
||||
int exprTokensEqual(exprtoken *a, exprtoken *b) {
|
||||
// If both are strings, do string comparison.
|
||||
if (a->token_type == EXPR_TOKEN_STR && b->token_type == EXPR_TOKEN_STR) {
|
||||
return a->str.len == b->str.len &&
|
||||
memcmp(a->str.start, b->str.start, a->str.len) == 0;
|
||||
}
|
||||
|
||||
// If both are numbers, do numeric comparison.
|
||||
if (a->token_type == EXPR_TOKEN_NUM && b->token_type == EXPR_TOKEN_NUM) {
|
||||
return a->num == b->num;
|
||||
}
|
||||
|
||||
/* If one of the two is null, the expression is true only if
|
||||
* both are null. */
|
||||
if (a->token_type == EXPR_TOKEN_NULL || b->token_type == EXPR_TOKEN_NULL) {
|
||||
return a->token_type == b->token_type;
|
||||
}
|
||||
|
||||
// Mixed types - convert to numbers and compare.
|
||||
return exprTokenToNum(a) == exprTokenToNum(b);
|
||||
}
|
||||
|
||||
#include "fastjson.c" // JSON parser implementation used by exprRun().
|
||||
|
||||
/* Execute the compiled expression program. Returns 1 if the final stack value
|
||||
* evaluates to true, 0 otherwise. Also returns 0 if any selector callback
|
||||
* fails. */
|
||||
int exprRun(exprstate *es, char *json, size_t json_len) {
|
||||
exprStackReset(&es->values_stack);
|
||||
|
||||
// Execute each instruction in the program.
|
||||
for (int i = 0; i < es->program.numitems; i++) {
|
||||
exprtoken *t = es->program.items[i];
|
||||
|
||||
// Handle selectors by calling the callback.
|
||||
if (t->token_type == EXPR_TOKEN_SELECTOR) {
|
||||
exprtoken *obj = NULL;
|
||||
if (t->str.len > 0)
|
||||
obj = jsonExtractField(json,json_len,t->str.start,t->str.len);
|
||||
|
||||
// Selector not found or JSON object not convertible to
|
||||
// expression tokens. Evaluate the expression to false.
|
||||
if (obj == NULL) return 0;
|
||||
exprStackPush(&es->values_stack, obj);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Push non-operator values directly onto the stack.
|
||||
if (t->token_type != EXPR_TOKEN_OP) {
|
||||
exprStackPush(&es->values_stack, t);
|
||||
exprTokenRetain(t);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Handle operators.
|
||||
exprtoken *result = exprNewToken(EXPR_TOKEN_NUM);
|
||||
|
||||
// Pop operands - we know we have enough from compile-time checks.
|
||||
exprtoken *b = exprStackPop(&es->values_stack);
|
||||
exprtoken *a = NULL;
|
||||
if (exprGetOpArity(t->opcode) == 2) {
|
||||
a = exprStackPop(&es->values_stack);
|
||||
}
|
||||
|
||||
switch(t->opcode) {
|
||||
case EXPR_OP_NOT:
|
||||
result->num = exprTokenToBool(b) == 0 ? 1 : 0;
|
||||
break;
|
||||
case EXPR_OP_POW: {
|
||||
double base = exprTokenToNum(a);
|
||||
double exp = exprTokenToNum(b);
|
||||
result->num = pow(base, exp);
|
||||
break;
|
||||
}
|
||||
case EXPR_OP_MULT:
|
||||
result->num = exprTokenToNum(a) * exprTokenToNum(b);
|
||||
break;
|
||||
case EXPR_OP_DIV:
|
||||
result->num = exprTokenToNum(a) / exprTokenToNum(b);
|
||||
break;
|
||||
case EXPR_OP_MOD: {
|
||||
double va = exprTokenToNum(a);
|
||||
double vb = exprTokenToNum(b);
|
||||
result->num = fmod(va, vb);
|
||||
break;
|
||||
}
|
||||
case EXPR_OP_SUM:
|
||||
result->num = exprTokenToNum(a) + exprTokenToNum(b);
|
||||
break;
|
||||
case EXPR_OP_DIFF:
|
||||
result->num = exprTokenToNum(a) - exprTokenToNum(b);
|
||||
break;
|
||||
case EXPR_OP_GT:
|
||||
result->num = exprTokenToNum(a) > exprTokenToNum(b) ? 1 : 0;
|
||||
break;
|
||||
case EXPR_OP_GTE:
|
||||
result->num = exprTokenToNum(a) >= exprTokenToNum(b) ? 1 : 0;
|
||||
break;
|
||||
case EXPR_OP_LT:
|
||||
result->num = exprTokenToNum(a) < exprTokenToNum(b) ? 1 : 0;
|
||||
break;
|
||||
case EXPR_OP_LTE:
|
||||
result->num = exprTokenToNum(a) <= exprTokenToNum(b) ? 1 : 0;
|
||||
break;
|
||||
case EXPR_OP_EQ:
|
||||
result->num = exprTokensEqual(a, b) ? 1 : 0;
|
||||
break;
|
||||
case EXPR_OP_NEQ:
|
||||
result->num = !exprTokensEqual(a, b) ? 1 : 0;
|
||||
break;
|
||||
case EXPR_OP_IN: {
|
||||
// For 'in' operator, b must be a tuple.
|
||||
result->num = 0; // Default to false.
|
||||
if (b->token_type == EXPR_TOKEN_TUPLE) {
|
||||
for (size_t j = 0; j < b->tuple.len; j++) {
|
||||
if (exprTokensEqual(a, b->tuple.ele[j])) {
|
||||
result->num = 1; // Found a match.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
case EXPR_OP_AND:
|
||||
result->num =
|
||||
exprTokenToBool(a) != 0 && exprTokenToBool(b) != 0 ? 1 : 0;
|
||||
break;
|
||||
case EXPR_OP_OR:
|
||||
result->num =
|
||||
exprTokenToBool(a) != 0 || exprTokenToBool(b) != 0 ? 1 : 0;
|
||||
break;
|
||||
default:
|
||||
// Do nothing: we don't want runtime errors.
|
||||
break;
|
||||
}
|
||||
|
||||
// Free operands and push result.
|
||||
if (a) exprTokenRelease(a);
|
||||
exprTokenRelease(b);
|
||||
exprStackPush(&es->values_stack, result);
|
||||
}
|
||||
|
||||
// Get final result from stack.
|
||||
exprtoken *final = exprStackPop(&es->values_stack);
|
||||
if (final == NULL) return 0;
|
||||
|
||||
// Convert result to boolean.
|
||||
int retval = exprTokenToBool(final);
|
||||
exprTokenRelease(final);
|
||||
return retval;
|
||||
}
|
||||
|
||||
/* ============================ Simple test main ============================ */
|
||||
|
||||
#ifdef TEST_MAIN
|
||||
#include "fastjson_test.c"
|
||||
|
||||
void exprPrintToken(exprtoken *t) {
|
||||
switch(t->token_type) {
|
||||
case EXPR_TOKEN_EOF:
|
||||
printf("EOF");
|
||||
break;
|
||||
case EXPR_TOKEN_NUM:
|
||||
printf("NUM:%g", t->num);
|
||||
break;
|
||||
case EXPR_TOKEN_STR:
|
||||
printf("STR:\"%.*s\"", (int)t->str.len, t->str.start);
|
||||
break;
|
||||
case EXPR_TOKEN_SELECTOR:
|
||||
printf("SEL:%.*s", (int)t->str.len, t->str.start);
|
||||
break;
|
||||
case EXPR_TOKEN_OP:
|
||||
printf("OP:");
|
||||
for (int i = 0; ExprOptable[i].opname != NULL; i++) {
|
||||
if (ExprOptable[i].opcode == t->opcode) {
|
||||
printf("%s", ExprOptable[i].opname);
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
printf("UNKNOWN");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void exprPrintStack(exprstack *stack, const char *name) {
|
||||
printf("%s (%d items):", name, stack->numitems);
|
||||
for (int j = 0; j < stack->numitems; j++) {
|
||||
printf(" ");
|
||||
exprPrintToken(stack->items[j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
/* Check for JSON parser test mode. */
|
||||
if (argc >= 2 && strcmp(argv[1], "--test-json-parser") == 0) {
|
||||
run_fastjson_test();
|
||||
return 0;
|
||||
}
|
||||
|
||||
char *testexpr = "(5+2)*3 and .year > 1980 and 'foo' == 'foo'";
|
||||
char *testjson = "{\"year\": 1984, \"name\": \"The Matrix\"}";
|
||||
if (argc >= 2) testexpr = argv[1];
|
||||
if (argc >= 3) testjson = argv[2];
|
||||
|
||||
printf("Compiling expression: %s\n", testexpr);
|
||||
|
||||
int errpos = 0;
|
||||
exprstate *es = exprCompile(testexpr,&errpos);
|
||||
if (es == NULL) {
|
||||
printf("Compilation failed near \"...%s\"\n", testexpr+errpos);
|
||||
return 1;
|
||||
}
|
||||
|
||||
exprPrintStack(&es->tokens, "Tokens");
|
||||
exprPrintStack(&es->program, "Program");
|
||||
printf("Running against object: %s\n", testjson);
|
||||
int result = exprRun(es,testjson,strlen(testjson));
|
||||
printf("Result1: %s\n", result ? "True" : "False");
|
||||
result = exprRun(es,testjson,strlen(testjson));
|
||||
printf("Result2: %s\n", result ? "True" : "False");
|
||||
|
||||
exprFree(es);
|
||||
return 0;
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,441 @@
|
|||
/* Ultra‑lightweight top‑level JSON field extractor.
|
||||
* Return the element directly as an expr.c token.
|
||||
* This code is directly included inside expr.c.
|
||||
*
|
||||
* Copyright (c) 2025-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
*
|
||||
* Originally authored by: Salvatore Sanfilippo.
|
||||
*
|
||||
* ------------------------------------------------------------------
|
||||
*
|
||||
* DESIGN GOALS:
|
||||
*
|
||||
* 1. Zero heap allocations while seeking the requested key.
|
||||
* 2. A single parse (and therefore a single allocation, if needed)
|
||||
* when the key finally matches.
|
||||
* 3. Same subset‑of‑JSON coverage needed by expr.c:
|
||||
* - Strings (escapes: \" \\ \n \r \t).
|
||||
* - Numbers (double).
|
||||
* - Booleans.
|
||||
* - Null.
|
||||
* - Flat arrays of the above primitives.
|
||||
*
|
||||
* Any other value (nested object, unicode escape, etc.) returns NULL.
|
||||
* Should be very easy to extend it in case in the future we want
|
||||
* more for the FILTER option of VSIM.
|
||||
* 4. No global state, so this file can be #included directly in expr.c.
|
||||
*
|
||||
* The only API expr.c uses directly is:
|
||||
*
|
||||
* exprtoken *jsonExtractField(const char *json, size_t json_len,
|
||||
* const char *field, size_t field_len);
|
||||
* ------------------------------------------------------------------ */
|
||||
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
|
||||
// Forward declarations.
|
||||
static int jsonSkipValue(const char **p, const char *end);
|
||||
static exprtoken *jsonParseValueToken(const char **p, const char *end);
|
||||
|
||||
/* Similar to ctype.h isdigit() but covers the whole JSON number charset,
|
||||
* including exp form. */
|
||||
static int jsonIsNumberChar(int c) {
|
||||
return isdigit(c) || c=='-' || c=='+' || c=='.' || c=='e' || c=='E';
|
||||
}
|
||||
|
||||
/* ========================== Fast skipping of JSON =========================
|
||||
* The helpers here are designed to skip values without performing any
|
||||
* allocation. This way, for the use case of this JSON parser, we are able
|
||||
* to easily (and with good speed) skip fields and values we are not
|
||||
* interested in. Then, later in the code, when we find the field we want
|
||||
* to obtain, we finally call the functions that turn a given JSON value
|
||||
* associated to a field into our of our expressions token.
|
||||
* ========================================================================== */
|
||||
|
||||
/* Advance *p consuming all the spaces. */
|
||||
static inline void jsonSkipWhiteSpaces(const char **p, const char *end) {
|
||||
while (*p < end && isspace((unsigned char)**p)) (*p)++;
|
||||
}
|
||||
|
||||
/* Advance *p past a JSON string. Returns 1 on success, 0 on error. */
|
||||
static int jsonSkipString(const char **p, const char *end) {
|
||||
if (*p >= end || **p != '"') return 0;
|
||||
(*p)++; /* Skip opening quote. */
|
||||
while (*p < end) {
|
||||
if (**p == '\\') {
|
||||
(*p) += 2;
|
||||
continue;
|
||||
}
|
||||
if (**p == '"') {
|
||||
(*p)++; /* Skip closing quote. */
|
||||
return 1;
|
||||
}
|
||||
(*p)++;
|
||||
}
|
||||
return 0; /* unterminated */
|
||||
}
|
||||
|
||||
/* Skip an array or object generically using depth counter.
|
||||
* Opener and closer tells the function how the aggregated
|
||||
* data type starts/stops, basically [] or {}. */
|
||||
static int jsonSkipBracketed(const char **p, const char *end,
|
||||
char opener, char closer) {
|
||||
int depth = 1;
|
||||
(*p)++; /* Skip opener. */
|
||||
|
||||
/* Loop until we reach the end of the input or find the matching
|
||||
* closer (depth becomes 0). */
|
||||
while (*p < end && depth > 0) {
|
||||
char c = **p;
|
||||
|
||||
if (c == '"') {
|
||||
// Found a string, delegate skipping to jsonSkipString().
|
||||
if (!jsonSkipString(p, end)) {
|
||||
return 0; // String skipping failed (e.g., unterminated)
|
||||
}
|
||||
/* jsonSkipString() advances *p past the closing quote.
|
||||
* Continue the loop to process the character *after* the string. */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* If it's not a string, check if it affects the depth for the
|
||||
* specific brackets we are currently tracking. */
|
||||
if (c == opener) {
|
||||
depth++;
|
||||
} else if (c == closer) {
|
||||
depth--;
|
||||
}
|
||||
|
||||
/* Always advance the pointer for any non-string character.
|
||||
* This handles commas, colons, whitespace, numbers, literals,
|
||||
* and even nested brackets of a *different* type than the
|
||||
* one we are currently skipping (e.g. skipping a { inside []). */
|
||||
(*p)++;
|
||||
}
|
||||
|
||||
/* Return 1 (true) if we successfully found the matching closer,
|
||||
* otherwise there is a parse error and we return 0. */
|
||||
return depth == 0;
|
||||
}
|
||||
|
||||
/* Skip a single JSON literal (true, null, ...) starting at *p.
|
||||
* Returns 1 on success, 0 on failure. */
|
||||
static int jsonSkipLiteral(const char **p, const char *end, const char *lit) {
|
||||
size_t l = strlen(lit);
|
||||
if (*p + l > end) return 0;
|
||||
if (strncmp(*p, lit, l) == 0) { *p += l; return 1; }
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Skip number, don't check that number format is correct, just consume
|
||||
* number-alike characters.
|
||||
*
|
||||
* Note: More robust number skipping might check validity,
|
||||
* but for skipping, just consuming plausible characters is enough. */
|
||||
static int jsonSkipNumber(const char **p, const char *end) {
|
||||
const char *num_start = *p;
|
||||
while (*p < end && jsonIsNumberChar(**p)) (*p)++;
|
||||
return *p > num_start; // Any progress made? Otherwise no number found.
|
||||
}
|
||||
|
||||
/* Skip any JSON value. 1 = success, 0 = error. */
|
||||
static int jsonSkipValue(const char **p, const char *end) {
|
||||
jsonSkipWhiteSpaces(p, end);
|
||||
if (*p >= end) return 0;
|
||||
switch (**p) {
|
||||
case '"': return jsonSkipString(p, end);
|
||||
case '{': return jsonSkipBracketed(p, end, '{', '}');
|
||||
case '[': return jsonSkipBracketed(p, end, '[', ']');
|
||||
case 't': return jsonSkipLiteral(p, end, "true");
|
||||
case 'f': return jsonSkipLiteral(p, end, "false");
|
||||
case 'n': return jsonSkipLiteral(p, end, "null");
|
||||
default: return jsonSkipNumber(p, end);
|
||||
}
|
||||
}
|
||||
|
||||
/* =========================== JSON to exprtoken ============================
|
||||
* The functions below convert a given json value to the equivalent
|
||||
* expression token structure.
|
||||
* ========================================================================== */
|
||||
|
||||
static exprtoken *jsonParseStringToken(const char **p, const char *end) {
|
||||
if (*p >= end || **p != '"') return NULL;
|
||||
const char *start = ++(*p);
|
||||
int esc = 0; size_t len = 0; int has_esc = 0;
|
||||
const char *q = *p;
|
||||
while (q < end) {
|
||||
if (esc) { esc = 0; q++; len++; has_esc = 1; continue; }
|
||||
if (*q == '\\') { esc = 1; q++; continue; }
|
||||
if (*q == '"') break;
|
||||
q++; len++;
|
||||
}
|
||||
if (q >= end || *q != '"') return NULL; // Unterminated string
|
||||
exprtoken *t = exprNewToken(EXPR_TOKEN_STR);
|
||||
|
||||
if (!has_esc) {
|
||||
// No escapes, we can point directly into the original JSON string.
|
||||
t->str.start = (char*)start; t->str.len = len; t->str.heapstr = NULL;
|
||||
} else {
|
||||
// Escapes present, need to allocate and copy/process escapes.
|
||||
char *dst = RedisModule_Alloc(len + 1);
|
||||
|
||||
t->str.start = t->str.heapstr = dst; t->str.len = len;
|
||||
const char *r = start; esc = 0;
|
||||
while (r < q) {
|
||||
if (esc) {
|
||||
switch (*r) {
|
||||
// Supported escapes from Goal 3.
|
||||
case 'n': *dst='\n'; break;
|
||||
case 'r': *dst='\r'; break;
|
||||
case 't': *dst='\t'; break;
|
||||
case '\\': *dst='\\'; break;
|
||||
case '"': *dst='\"'; break;
|
||||
// Escapes (like \uXXXX, \b, \f) are not supported for now,
|
||||
// we just copy them verbatim.
|
||||
default: *dst=*r; break;
|
||||
}
|
||||
dst++; esc = 0; r++; continue;
|
||||
}
|
||||
if (*r == '\\') { esc = 1; r++; continue; }
|
||||
*dst++ = *r++;
|
||||
}
|
||||
*dst = '\0'; // Null-terminate the allocated string.
|
||||
}
|
||||
*p = q + 1; // Advance the main pointer past the closing quote.
|
||||
return t;
|
||||
}
|
||||
|
||||
static exprtoken *jsonParseNumberToken(const char **p, const char *end) {
|
||||
// Use a buffer to extract the number literal for parsing with strtod().
|
||||
char buf[256]; int idx = 0;
|
||||
const char *start = *p; // For strtod partial failures check.
|
||||
|
||||
// Copy potential number characters to buffer.
|
||||
while (*p < end && idx < (int)sizeof(buf)-1 && jsonIsNumberChar(**p)) {
|
||||
buf[idx++] = **p;
|
||||
(*p)++;
|
||||
}
|
||||
buf[idx]='\0'; // Null-terminate buffer.
|
||||
|
||||
if (idx==0) return NULL; // No number characters found.
|
||||
|
||||
char *ep; // End pointer for strtod validation.
|
||||
double v = strtod(buf, &ep);
|
||||
|
||||
/* Check if strtod() consumed the entire buffer content.
|
||||
* If not, the number format was invalid. */
|
||||
if (*ep!='\0') {
|
||||
// strtod() failed; rewind p to the start and return NULL
|
||||
*p = start;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// If strtod() succeeded, create and return the token..
|
||||
exprtoken *t = exprNewToken(EXPR_TOKEN_NUM);
|
||||
t->num = v;
|
||||
return t;
|
||||
}
|
||||
|
||||
static exprtoken *jsonParseLiteralToken(const char **p, const char *end, const char *lit, int type, double num) {
|
||||
size_t l = strlen(lit);
|
||||
|
||||
// Ensure we don't read past 'end'.
|
||||
if ((*p + l) > end) return NULL;
|
||||
|
||||
if (strncmp(*p, lit, l) != 0) return NULL; // Literal doesn't match.
|
||||
|
||||
// Check that the character *after* the literal is a valid JSON delimiter
|
||||
// (whitespace, comma, closing bracket/brace, or end of input)
|
||||
// This prevents matching "trueblabla" as "true".
|
||||
if ((*p + l) < end) {
|
||||
char next_char = *(*p + l);
|
||||
if (!isspace((unsigned char)next_char) && next_char!=',' &&
|
||||
next_char!=']' && next_char!='}') {
|
||||
return NULL; // Invalid character following literal.
|
||||
}
|
||||
}
|
||||
|
||||
// Literal matched and is correctly terminated.
|
||||
*p += l;
|
||||
exprtoken *t = exprNewToken(type);
|
||||
t->num = num;
|
||||
return t;
|
||||
}
|
||||
|
||||
static exprtoken *jsonParseArrayToken(const char **p, const char *end) {
|
||||
if (*p >= end || **p != '[') return NULL;
|
||||
(*p)++; // Skip '['.
|
||||
jsonSkipWhiteSpaces(p,end);
|
||||
|
||||
exprtoken *t = exprNewToken(EXPR_TOKEN_TUPLE);
|
||||
t->tuple.len = 0; t->tuple.ele = NULL; size_t alloc = 0;
|
||||
|
||||
// Handle empty array [].
|
||||
if (*p < end && **p == ']') {
|
||||
(*p)++; // Skip ']'.
|
||||
return t;
|
||||
}
|
||||
|
||||
// Parse array elements.
|
||||
while (1) {
|
||||
exprtoken *ele = jsonParseValueToken(p,end);
|
||||
if (!ele) {
|
||||
exprTokenRelease(t); // Clean up partially built array token.
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Grow allocated space for elements if needed.
|
||||
if (t->tuple.len == alloc) {
|
||||
size_t newsize = alloc ? alloc * 2 : 4;
|
||||
// Check for potential overflow if newsize becomes huge.
|
||||
if (newsize < alloc) {
|
||||
exprTokenRelease(ele);
|
||||
exprTokenRelease(t);
|
||||
return NULL;
|
||||
}
|
||||
exprtoken **newele = RedisModule_Realloc(t->tuple.ele,
|
||||
sizeof(exprtoken*)*newsize);
|
||||
t->tuple.ele = newele;
|
||||
alloc = newsize;
|
||||
}
|
||||
t->tuple.ele[t->tuple.len++] = ele; // Add element.
|
||||
|
||||
jsonSkipWhiteSpaces(p,end);
|
||||
if (*p>=end) {
|
||||
// Unterminated array. Note that this check is crucial because
|
||||
// previous value parsed may seek 'p' to 'end'.
|
||||
exprTokenRelease(t);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Check for comma (more elements) or closing bracket.
|
||||
if (**p == ',') {
|
||||
(*p)++; // Skip ','
|
||||
jsonSkipWhiteSpaces(p,end); // Skip whitespace before next element
|
||||
continue; // Parse next element
|
||||
} else if (**p == ']') {
|
||||
(*p)++; // Skip ']'
|
||||
return t; // End of array
|
||||
} else {
|
||||
// Unexpected character (not ',' or ']')
|
||||
exprTokenRelease(t);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* Turn a JSON value into an expr token. */
|
||||
static exprtoken *jsonParseValueToken(const char **p, const char *end) {
|
||||
jsonSkipWhiteSpaces(p,end);
|
||||
if (*p >= end) return NULL;
|
||||
|
||||
switch (**p) {
|
||||
case '"': return jsonParseStringToken(p,end);
|
||||
case '[': return jsonParseArrayToken(p,end);
|
||||
case '{': return NULL; // No nested elements support for now.
|
||||
case 't': return jsonParseLiteralToken(p,end,"true",EXPR_TOKEN_NUM,1);
|
||||
case 'f': return jsonParseLiteralToken(p,end,"false",EXPR_TOKEN_NUM,0);
|
||||
case 'n': return jsonParseLiteralToken(p,end,"null",EXPR_TOKEN_NULL,0);
|
||||
default:
|
||||
// Check if it starts like a number.
|
||||
if (isdigit((unsigned char)**p) || **p=='-' || **p=='+') {
|
||||
return jsonParseNumberToken(p,end);
|
||||
}
|
||||
// Anything else is an unsupported type or malformed JSON.
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* ============================== Fast key seeking ========================== */
|
||||
|
||||
/* Finds the start of the value for a given field key within a JSON object.
|
||||
* Returns pointer to the first char of the value, or NULL if not found/error.
|
||||
* This function does not perform any allocation and is optimized to seek
|
||||
* the specified *toplevel* filed as fast as possible. */
|
||||
static const char *jsonSeekField(const char *json, const char *end,
|
||||
const char *field, size_t flen) {
|
||||
const char *p = json;
|
||||
jsonSkipWhiteSpaces(&p,end);
|
||||
if (p >= end || *p != '{') return NULL; // Must start with '{'.
|
||||
p++; // skip '{'.
|
||||
|
||||
while (1) {
|
||||
jsonSkipWhiteSpaces(&p,end);
|
||||
if (p >= end) return NULL; // Reached end within object.
|
||||
|
||||
if (*p == '}') return NULL; // End of object, field not found.
|
||||
|
||||
// Expecting a key (string).
|
||||
if (*p != '"') return NULL; // Key must be a string.
|
||||
|
||||
// --- Key Matching using jsonSkipString ---
|
||||
const char *key_start = p + 1; // Start of key content.
|
||||
const char *key_end_p = p; // Will later contain the end.
|
||||
|
||||
// Use jsonSkipString() to find the end.
|
||||
if (!jsonSkipString(&key_end_p, end)) {
|
||||
// Unterminated / invalid key string.
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Calculate the length of the key's content.
|
||||
size_t klen = (key_end_p - 1) - key_start;
|
||||
|
||||
/* Perform the comparison using the raw key content.
|
||||
* WARNING: This uses memcmp(), so we don't handle escaped chars
|
||||
* within the key matching against unescaped chars in 'field'. */
|
||||
int match = klen == flen && !memcmp(key_start, field, flen);
|
||||
|
||||
// Update the main pointer 'p' to be after the key string.
|
||||
p = key_end_p;
|
||||
|
||||
// Now we expect to find a ":" followed by a value.
|
||||
jsonSkipWhiteSpaces(&p,end);
|
||||
if (p>=end || *p!=':') return NULL; // Expect ':' after key
|
||||
p++; // Skip ':'.
|
||||
|
||||
// Seek value.
|
||||
jsonSkipWhiteSpaces(&p,end);
|
||||
if (p>=end) return NULL; // Expect value after ':'
|
||||
|
||||
if (match) {
|
||||
// Found the matching key, p now points to the start of the value.
|
||||
return p;
|
||||
} else {
|
||||
// Key didn't match, skip the corresponding value.
|
||||
if (!jsonSkipValue(&p,end)) return NULL; // Syntax error.
|
||||
}
|
||||
|
||||
|
||||
// Look for comma or a closing brace.
|
||||
jsonSkipWhiteSpaces(&p,end);
|
||||
if (p>=end) return NULL; // Reached end after value.
|
||||
|
||||
if (*p == ',') {
|
||||
p++; // Skip comma, continue loop to find next key.
|
||||
continue;
|
||||
} else if (*p == '}') {
|
||||
return NULL; // Reached end of object, field not found.
|
||||
}
|
||||
return NULL; // Malformed JSON (unexpected char after value).
|
||||
}
|
||||
}
|
||||
|
||||
/* This is the only real API that this file conceptually exports (it is
|
||||
* inlined, actually). */
|
||||
exprtoken *jsonExtractField(const char *json, size_t json_len,
|
||||
const char *field, size_t field_len)
|
||||
{
|
||||
const char *end = json + json_len;
|
||||
const char *valptr = jsonSeekField(json,end,field,field_len);
|
||||
if (!valptr) return NULL;
|
||||
|
||||
/* Key found, valptr points to the start of the value.
|
||||
* Convert it into an expression token object. */
|
||||
return jsonParseValueToken(&valptr,end);
|
||||
}
|
|
@ -0,0 +1,406 @@
|
|||
/* fastjson_test.c - Stress test for fastjson.c
|
||||
*
|
||||
* This performs boundary and corruption tests to ensure
|
||||
* the JSON parser handles edge cases without accessing
|
||||
* memory outside the bounds of the input.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <signal.h>
|
||||
#include <time.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/types.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <setjmp.h>
|
||||
|
||||
/* Page size constant - typically 4096 or 16k bytes (Apple Silicon).
|
||||
* We use 16k so that it will work on both, but not with Linux huge pages. */
|
||||
#define PAGE_SIZE 4096*4
|
||||
#define MAX_JSON_SIZE (PAGE_SIZE - 128) /* Keep some margin */
|
||||
#define MAX_FIELD_SIZE 64
|
||||
#define NUM_TEST_ITERATIONS 100000
|
||||
#define NUM_CORRUPTION_TESTS 10000
|
||||
#define NUM_BOUNDARY_TESTS 10000
|
||||
|
||||
/* Test state tracking */
|
||||
static char *safe_page = NULL; /* Start of readable/writable page */
|
||||
static char *unsafe_page = NULL; /* Start of inaccessible guard page */
|
||||
static int boundary_violation = 0; /* Flag for boundary violations */
|
||||
static jmp_buf jmpbuf; /* For signal handling */
|
||||
static int tests_passed = 0;
|
||||
static int tests_failed = 0;
|
||||
static int corruptions_passed = 0;
|
||||
static int boundary_tests_passed = 0;
|
||||
|
||||
/* Test metadata for tracking */
|
||||
typedef struct {
|
||||
char *json;
|
||||
size_t json_len;
|
||||
char field[MAX_FIELD_SIZE];
|
||||
size_t field_len;
|
||||
int expected_result;
|
||||
} test_case_t;
|
||||
|
||||
/* Forward declarations for test JSON generation */
|
||||
char *generate_random_json(size_t *len, char *field, size_t *field_len, int *has_field);
|
||||
void corrupt_json(char *json, size_t len);
|
||||
void setup_test_memory(void);
|
||||
void cleanup_test_memory(void);
|
||||
void run_normal_tests(void);
|
||||
void run_corruption_tests(void);
|
||||
void run_boundary_tests(void);
|
||||
void print_test_summary(void);
|
||||
|
||||
/* Signal handler for segmentation violations */
|
||||
static void sigsegv_handler(int sig) {
|
||||
boundary_violation = 1;
|
||||
printf("Boundary violation detected! Caught signal %d\n", sig);
|
||||
longjmp(jmpbuf, 1);
|
||||
}
|
||||
|
||||
/* Wrapper for jsonExtractField to check for boundary violations */
|
||||
exprtoken *safe_extract_field(const char *json, size_t json_len,
|
||||
const char *field, size_t field_len) {
|
||||
boundary_violation = 0;
|
||||
|
||||
if (setjmp(jmpbuf) == 0) {
|
||||
return jsonExtractField(json, json_len, field, field_len);
|
||||
} else {
|
||||
return NULL; /* Return NULL if boundary violation occurred */
|
||||
}
|
||||
}
|
||||
|
||||
/* Setup two adjacent memory pages - one readable/writable, one inaccessible */
|
||||
void setup_test_memory(void) {
|
||||
/* Request a page of memory, with specific alignment. We rely on the
|
||||
* fact that hopefully the page after that will cause a segfault if
|
||||
* accessed. */
|
||||
void *region = mmap(NULL, PAGE_SIZE,
|
||||
PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS,
|
||||
-1, 0);
|
||||
|
||||
if (region == MAP_FAILED) {
|
||||
perror("mmap failed");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
safe_page = (char*)region;
|
||||
unsafe_page = safe_page + PAGE_SIZE;
|
||||
// Uncomment to make sure it crashes :D
|
||||
// printf("%d\n", unsafe_page[5]);
|
||||
|
||||
/* Set up signal handlers for memory access violations */
|
||||
struct sigaction sa;
|
||||
sa.sa_handler = sigsegv_handler;
|
||||
sigemptyset(&sa.sa_mask);
|
||||
sa.sa_flags = 0;
|
||||
|
||||
sigaction(SIGSEGV, &sa, NULL);
|
||||
sigaction(SIGBUS, &sa, NULL);
|
||||
}
|
||||
|
||||
void cleanup_test_memory(void) {
|
||||
if (safe_page != NULL) {
|
||||
munmap(safe_page, PAGE_SIZE);
|
||||
safe_page = NULL;
|
||||
unsafe_page = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Generate random strings with proper escaping for JSON */
|
||||
void generate_random_string(char *buffer, size_t max_len) {
|
||||
static const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
|
||||
size_t len = 1 + rand() % (max_len - 2); /* Ensure at least 1 char */
|
||||
|
||||
for (size_t i = 0; i < len; i++) {
|
||||
buffer[i] = charset[rand() % (sizeof(charset) - 1)];
|
||||
}
|
||||
buffer[len] = '\0';
|
||||
}
|
||||
|
||||
/* Generate random numbers as strings */
|
||||
void generate_random_number(char *buffer, size_t max_len) {
|
||||
double num = (double)rand() / RAND_MAX * 1000.0;
|
||||
|
||||
/* Occasionally make it negative or add decimal places */
|
||||
if (rand() % 5 == 0) num = -num;
|
||||
if (rand() % 3 != 0) num += (double)(rand() % 100) / 100.0;
|
||||
|
||||
snprintf(buffer, max_len, "%.6g", num);
|
||||
}
|
||||
|
||||
/* Generate a random field name */
|
||||
void generate_random_field(char *field, size_t *field_len) {
|
||||
generate_random_string(field, MAX_FIELD_SIZE / 2);
|
||||
*field_len = strlen(field);
|
||||
}
|
||||
|
||||
/* Generate a random JSON object with fields */
|
||||
char *generate_random_json(size_t *len, char *field, size_t *field_len, int *has_field) {
|
||||
char *json = malloc(MAX_JSON_SIZE);
|
||||
if (json == NULL) {
|
||||
perror("malloc");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
char buffer[MAX_JSON_SIZE / 4]; /* Buffer for generating values */
|
||||
int pos = 0;
|
||||
int num_fields = 1 + rand() % 10; /* Random number of fields */
|
||||
int target_field_index = rand() % num_fields; /* Which field to return */
|
||||
|
||||
/* Start the JSON object */
|
||||
pos += snprintf(json + pos, MAX_JSON_SIZE - pos, "{");
|
||||
|
||||
/* Generate random field/value pairs */
|
||||
for (int i = 0; i < num_fields; i++) {
|
||||
/* Add a comma if not the first field */
|
||||
if (i > 0) {
|
||||
pos += snprintf(json + pos, MAX_JSON_SIZE - pos, ", ");
|
||||
}
|
||||
|
||||
/* Generate a field name */
|
||||
if (i == target_field_index) {
|
||||
/* This is our target field - save it for the caller */
|
||||
generate_random_field(field, field_len);
|
||||
pos += snprintf(json + pos, MAX_JSON_SIZE - pos, "\"%s\": ", field);
|
||||
*has_field = 1;
|
||||
/* Sometimes change the last char so that it will not match. */
|
||||
if (rand() % 2) {
|
||||
*has_field = 0;
|
||||
field[*field_len-1] = '!';
|
||||
}
|
||||
} else {
|
||||
generate_random_string(buffer, MAX_FIELD_SIZE / 4);
|
||||
pos += snprintf(json + pos, MAX_JSON_SIZE - pos, "\"%s\": ", buffer);
|
||||
}
|
||||
|
||||
/* Generate a random value type */
|
||||
int value_type = rand() % 5;
|
||||
switch (value_type) {
|
||||
case 0: /* String */
|
||||
generate_random_string(buffer, MAX_JSON_SIZE / 8);
|
||||
pos += snprintf(json + pos, MAX_JSON_SIZE - pos, "\"%s\"", buffer);
|
||||
break;
|
||||
|
||||
case 1: /* Number */
|
||||
generate_random_number(buffer, MAX_JSON_SIZE / 8);
|
||||
pos += snprintf(json + pos, MAX_JSON_SIZE - pos, "%s", buffer);
|
||||
break;
|
||||
|
||||
case 2: /* Boolean: true */
|
||||
pos += snprintf(json + pos, MAX_JSON_SIZE - pos, "true");
|
||||
break;
|
||||
|
||||
case 3: /* Boolean: false */
|
||||
pos += snprintf(json + pos, MAX_JSON_SIZE - pos, "false");
|
||||
break;
|
||||
|
||||
case 4: /* Null */
|
||||
pos += snprintf(json + pos, MAX_JSON_SIZE - pos, "null");
|
||||
break;
|
||||
|
||||
case 5: /* Array (simple) */
|
||||
pos += snprintf(json + pos, MAX_JSON_SIZE - pos, "[");
|
||||
int array_items = 1 + rand() % 5;
|
||||
for (int j = 0; j < array_items; j++) {
|
||||
if (j > 0) pos += snprintf(json + pos, MAX_JSON_SIZE - pos, ", ");
|
||||
|
||||
/* Array items - either number or string */
|
||||
if (rand() % 2) {
|
||||
generate_random_number(buffer, MAX_JSON_SIZE / 16);
|
||||
pos += snprintf(json + pos, MAX_JSON_SIZE - pos, "%s", buffer);
|
||||
} else {
|
||||
generate_random_string(buffer, MAX_JSON_SIZE / 16);
|
||||
pos += snprintf(json + pos, MAX_JSON_SIZE - pos, "\"%s\"", buffer);
|
||||
}
|
||||
}
|
||||
pos += snprintf(json + pos, MAX_JSON_SIZE - pos, "]");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Close the JSON object */
|
||||
pos += snprintf(json + pos, MAX_JSON_SIZE - pos, "}");
|
||||
*len = pos;
|
||||
|
||||
return json;
|
||||
}
|
||||
|
||||
/* Corrupt JSON by replacing random characters */
|
||||
void corrupt_json(char *json, size_t len) {
|
||||
if (len < 2) return; /* Too short to corrupt safely */
|
||||
|
||||
/* Corrupt 1-3 characters */
|
||||
int num_corruptions = 1 + rand() % 3;
|
||||
for (int i = 0; i < num_corruptions; i++) {
|
||||
size_t pos = rand() % len;
|
||||
char corruption = " \t\n{}[]\":,0123456789abcdefXYZ"[rand() % 30];
|
||||
json[pos] = corruption;
|
||||
}
|
||||
}
|
||||
|
||||
/* Run standard parser tests with generated valid JSON */
|
||||
void run_normal_tests(void) {
|
||||
printf("Running normal JSON extraction tests...\n");
|
||||
|
||||
for (int i = 0; i < NUM_TEST_ITERATIONS; i++) {
|
||||
char field[MAX_FIELD_SIZE] = {0};
|
||||
size_t field_len = 0;
|
||||
size_t json_len = 0;
|
||||
int has_field = 0;
|
||||
|
||||
/* Generate random JSON */
|
||||
char *json = generate_random_json(&json_len, field, &field_len, &has_field);
|
||||
|
||||
/* Use valid field to test parser */
|
||||
exprtoken *token = safe_extract_field(json, json_len, field, field_len);
|
||||
|
||||
/* Check if we got a token as expected */
|
||||
if (has_field && token != NULL) {
|
||||
exprTokenRelease(token);
|
||||
tests_passed++;
|
||||
} else if (!has_field && token == NULL) {
|
||||
tests_passed++;
|
||||
} else {
|
||||
tests_failed++;
|
||||
}
|
||||
|
||||
/* Test with a non-existent field */
|
||||
char nonexistent_field[MAX_FIELD_SIZE] = "nonexistent_field";
|
||||
token = safe_extract_field(json, json_len, nonexistent_field, strlen(nonexistent_field));
|
||||
|
||||
if (token == NULL) {
|
||||
tests_passed++;
|
||||
} else {
|
||||
exprTokenRelease(token);
|
||||
tests_failed++;
|
||||
}
|
||||
|
||||
free(json);
|
||||
}
|
||||
}
|
||||
|
||||
/* Run tests with corrupted JSON */
|
||||
void run_corruption_tests(void) {
|
||||
printf("Running JSON corruption tests...\n");
|
||||
|
||||
for (int i = 0; i < NUM_CORRUPTION_TESTS; i++) {
|
||||
char field[MAX_FIELD_SIZE] = {0};
|
||||
size_t field_len = 0;
|
||||
size_t json_len = 0;
|
||||
int has_field = 0;
|
||||
|
||||
/* Generate random JSON */
|
||||
char *json = generate_random_json(&json_len, field, &field_len, &has_field);
|
||||
|
||||
/* Make a copy and corrupt it */
|
||||
char *corrupted = malloc(json_len + 1);
|
||||
if (!corrupted) {
|
||||
perror("malloc");
|
||||
free(json);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
memcpy(corrupted, json, json_len + 1);
|
||||
corrupt_json(corrupted, json_len);
|
||||
|
||||
/* Test with corrupted JSON */
|
||||
exprtoken *token = safe_extract_field(corrupted, json_len, field, field_len);
|
||||
|
||||
/* We're just testing that it doesn't crash or access invalid memory */
|
||||
if (boundary_violation) {
|
||||
printf("Boundary violation with corrupted JSON!\n");
|
||||
tests_failed++;
|
||||
} else {
|
||||
if (token != NULL) {
|
||||
exprTokenRelease(token);
|
||||
}
|
||||
corruptions_passed++;
|
||||
}
|
||||
|
||||
free(corrupted);
|
||||
free(json);
|
||||
}
|
||||
}
|
||||
|
||||
/* Run tests at memory boundaries */
|
||||
void run_boundary_tests(void) {
|
||||
printf("Running memory boundary tests...\n");
|
||||
|
||||
for (int i = 0; i < NUM_BOUNDARY_TESTS; i++) {
|
||||
char field[MAX_FIELD_SIZE] = {0};
|
||||
size_t field_len = 0;
|
||||
size_t json_len = 0;
|
||||
int has_field = 0;
|
||||
|
||||
/* Generate random JSON */
|
||||
char *temp_json = generate_random_json(&json_len, field, &field_len, &has_field);
|
||||
|
||||
/* Truncate the JSON to a random length */
|
||||
size_t truncated_len = 1 + rand() % json_len;
|
||||
|
||||
/* Place at the edge of the safe page */
|
||||
size_t offset = PAGE_SIZE - truncated_len;
|
||||
memcpy(safe_page + offset, temp_json, truncated_len);
|
||||
|
||||
/* Test parsing with non-existent field (forcing it to scan to end) */
|
||||
char nonexistent_field[MAX_FIELD_SIZE] = "nonexistent_field";
|
||||
exprtoken *token = safe_extract_field(safe_page + offset, truncated_len,
|
||||
nonexistent_field, strlen(nonexistent_field));
|
||||
|
||||
/* We're just testing that it doesn't access memory beyond the boundary */
|
||||
if (boundary_violation) {
|
||||
printf("Boundary violation at edge of memory page!\n");
|
||||
tests_failed++;
|
||||
} else {
|
||||
if (token != NULL) {
|
||||
exprTokenRelease(token);
|
||||
}
|
||||
boundary_tests_passed++;
|
||||
}
|
||||
|
||||
free(temp_json);
|
||||
}
|
||||
}
|
||||
|
||||
/* Print summary of test results */
|
||||
void print_test_summary(void) {
|
||||
printf("\n===== FASTJSON PARSER TEST SUMMARY =====\n");
|
||||
printf("Normal tests passed: %d/%d\n", tests_passed, NUM_TEST_ITERATIONS * 2);
|
||||
printf("Corruption tests passed: %d/%d\n", corruptions_passed, NUM_CORRUPTION_TESTS);
|
||||
printf("Boundary tests passed: %d/%d\n", boundary_tests_passed, NUM_BOUNDARY_TESTS);
|
||||
printf("Failed tests: %d\n", tests_failed);
|
||||
|
||||
if (tests_failed == 0) {
|
||||
printf("\nALL TESTS PASSED! The JSON parser appears to be robust.\n");
|
||||
} else {
|
||||
printf("\nSome tests FAILED. The JSON parser may be vulnerable.\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* Entry point for fastjson parser test */
|
||||
void run_fastjson_test(void) {
|
||||
printf("Starting fastjson parser stress test...\n");
|
||||
|
||||
/* Seed the random number generator */
|
||||
srand(time(NULL));
|
||||
|
||||
/* Setup test memory environment */
|
||||
setup_test_memory();
|
||||
|
||||
/* Run the various test phases */
|
||||
run_normal_tests();
|
||||
run_corruption_tests();
|
||||
run_boundary_tests();
|
||||
|
||||
/* Print summary */
|
||||
print_test_summary();
|
||||
|
||||
/* Cleanup */
|
||||
cleanup_test_memory();
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,189 @@
|
|||
/*
|
||||
* HNSW (Hierarchical Navigable Small World) Implementation
|
||||
* Based on the paper by Yu. A. Malkov, D. A. Yashunin
|
||||
*
|
||||
* Copyright (c) 2009-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
* Originally authored by: Salvatore Sanfilippo.
|
||||
*/
|
||||
|
||||
#ifndef HNSW_H
|
||||
#define HNSW_H
|
||||
|
||||
#include <pthread.h>
|
||||
#include <stdatomic.h>
|
||||
|
||||
#define HNSW_DEFAULT_M 16 /* Used when 0 is given at creation time. */
|
||||
#define HNSW_MIN_M 4 /* Probably even too low already. */
|
||||
#define HNSW_MAX_M 4096 /* Safeguard sanity limit. */
|
||||
#define HNSW_MAX_THREADS 32 /* Maximum number of concurrent threads */
|
||||
|
||||
/* Quantization types you can enable at creation time in hnsw_new() */
|
||||
#define HNSW_QUANT_NONE 0 // No quantization.
|
||||
#define HNSW_QUANT_Q8 1 // Q8 quantization.
|
||||
#define HNSW_QUANT_BIN 2 // Binary quantization.
|
||||
|
||||
/* Layer structure for HNSW nodes. Each node will have from one to a few
|
||||
* of this depending on its level. */
|
||||
typedef struct {
|
||||
struct hnswNode **links; /* Array of neighbors for this layer */
|
||||
uint32_t num_links; /* Number of used links */
|
||||
uint32_t max_links; /* Maximum links for this layer. We may
|
||||
* reallocate the node in very particular
|
||||
* conditions in order to allow linking of
|
||||
* new inserted nodes, so this may change
|
||||
* dynamically and be > M*2 for a small set of
|
||||
* nodes. */
|
||||
float worst_distance; /* Distance to the worst neighbor */
|
||||
uint32_t worst_idx; /* Index of the worst neighbor */
|
||||
} hnswNodeLayer;
|
||||
|
||||
/* Node structure for HNSW graph */
|
||||
typedef struct hnswNode {
|
||||
uint32_t level; /* Node's maximum level */
|
||||
uint64_t id; /* Unique identifier, may be useful in order to
|
||||
* have a bitmap of visited notes to use as
|
||||
* alternative to epoch / visited_epoch.
|
||||
* Also used in serialization in order to retain
|
||||
* links specifying IDs. */
|
||||
void *vector; /* The vector, quantized or not. */
|
||||
float quants_range; /* Quantization range for this vector:
|
||||
* min/max values will be in the range
|
||||
* -quants_range, +quants_range */
|
||||
float l2; /* L2 before normalization. */
|
||||
|
||||
/* Last time (epoch) this node was visited. We need one per thread.
|
||||
* This avoids having a different data structure where we track
|
||||
* visited nodes, but costs memory per node. */
|
||||
uint64_t visited_epoch[HNSW_MAX_THREADS];
|
||||
|
||||
void *value; /* Associated value */
|
||||
struct hnswNode *prev, *next; /* Prev/Next node in the list starting at
|
||||
* HNSW->head. */
|
||||
|
||||
/* Links (and links info) per each layer. Note that this is part
|
||||
* of the node allocation to be more cache friendly: reliable 3% speedup
|
||||
* on Apple silicon, and does not make anything more complex. */
|
||||
hnswNodeLayer layers[];
|
||||
} hnswNode;
|
||||
|
||||
struct HNSW;
|
||||
|
||||
/* It is possible to navigate an HNSW with a cursor that guarantees
|
||||
* visiting all the elements that remain in the HNSW from the start to the
|
||||
* end of the process (but not the new ones, so that the process will
|
||||
* eventually finish). Check hnsw_cursor_init(), hnsw_cursor_next() and
|
||||
* hnsw_cursor_free(). */
|
||||
typedef struct hnswCursor {
|
||||
struct HNSW *index; // Reference to the index of this cursor.
|
||||
hnswNode *current; // Element to report when hnsw_cursor_next() is called.
|
||||
struct hnswCursor *next; // Next cursor active.
|
||||
} hnswCursor;
|
||||
|
||||
/* Main HNSW index structure */
|
||||
typedef struct HNSW {
|
||||
hnswNode *enter_point; /* Entry point for the graph */
|
||||
uint32_t M; /* M as in the paper: layer 0 has M*2 max
|
||||
neighbors (M populated at insertion time)
|
||||
while all the other layers have M neighbors. */
|
||||
uint32_t max_level; /* Current maximum level in the graph */
|
||||
uint32_t vector_dim; /* Dimensionality of stored vectors */
|
||||
uint64_t node_count; /* Total number of nodes */
|
||||
_Atomic uint64_t last_id; /* Last node ID used */
|
||||
uint64_t current_epoch[HNSW_MAX_THREADS]; /* Current epoch for visit tracking */
|
||||
hnswNode *head; /* Linked list of nodes. Last first */
|
||||
|
||||
/* We have two locks here:
|
||||
* 1. A global_lock that is used to perform write operations blocking all
|
||||
* the readers.
|
||||
* 2. One mutex per epoch slot, in order for read operations to acquire
|
||||
* a lock on a specific slot to use epochs tracking of visited nodes. */
|
||||
pthread_rwlock_t global_lock; /* Global read-write lock */
|
||||
pthread_mutex_t slot_locks[HNSW_MAX_THREADS]; /* Per-slot locks */
|
||||
|
||||
_Atomic uint32_t next_slot; /* Next thread slot to try */
|
||||
_Atomic uint64_t version; /* Version for optimistic concurrency, this is
|
||||
* incremented on deletions and entry point
|
||||
* updates. */
|
||||
uint32_t quant_type; /* Quantization used. HNSW_QUANT_... */
|
||||
hnswCursor *cursors;
|
||||
} HNSW;
|
||||
|
||||
/* Serialized node. This structure is used as return value of
|
||||
* hnsw_serialize_node(). */
|
||||
typedef struct hnswSerNode {
|
||||
void *vector;
|
||||
uint32_t vector_size;
|
||||
uint64_t *params;
|
||||
uint32_t params_count;
|
||||
} hnswSerNode;
|
||||
|
||||
/* Insert preparation context */
|
||||
typedef struct InsertContext InsertContext;
|
||||
|
||||
/* Core HNSW functions */
|
||||
HNSW *hnsw_new(uint32_t vector_dim, uint32_t quant_type, uint32_t m);
|
||||
void hnsw_free(HNSW *index,void(*free_value)(void*value));
|
||||
void hnsw_node_free(hnswNode *node);
|
||||
void hnsw_print_stats(HNSW *index);
|
||||
hnswNode *hnsw_insert(HNSW *index, const float *vector, const int8_t *qvector,
|
||||
float qrange, uint64_t id, void *value, int ef);
|
||||
int hnsw_search(HNSW *index, const float *query, uint32_t k,
|
||||
hnswNode **neighbors, float *distances, uint32_t slot,
|
||||
int query_vector_is_normalized);
|
||||
int hnsw_search_with_filter
|
||||
(HNSW *index, const float *query_vector, uint32_t k,
|
||||
hnswNode **neighbors, float *distances, uint32_t slot,
|
||||
int query_vector_is_normalized,
|
||||
int (*filter_callback)(void *value, void *privdata),
|
||||
void *filter_privdata, uint32_t max_candidates);
|
||||
void hnsw_get_node_vector(HNSW *index, hnswNode *node, float *vec);
|
||||
int hnsw_delete_node(HNSW *index, hnswNode *node, void(*free_value)(void*value));
|
||||
hnswNode *hnsw_random_node(HNSW *index, int slot);
|
||||
|
||||
/* Thread safety functions. */
|
||||
int hnsw_acquire_read_slot(HNSW *index);
|
||||
void hnsw_release_read_slot(HNSW *index, int slot);
|
||||
|
||||
/* Optimistic insertion API. */
|
||||
InsertContext *hnsw_prepare_insert(HNSW *index, const float *vector, const int8_t *qvector, float qrange, uint64_t id, int ef);
|
||||
hnswNode *hnsw_try_commit_insert(HNSW *index, InsertContext *ctx, void *value);
|
||||
void hnsw_free_insert_context(InsertContext *ctx);
|
||||
|
||||
/* Serialization. */
|
||||
hnswSerNode *hnsw_serialize_node(HNSW *index, hnswNode *node);
|
||||
void hnsw_free_serialized_node(hnswSerNode *sn);
|
||||
hnswNode *hnsw_insert_serialized(HNSW *index, void *vector, uint64_t *params, uint32_t params_len, void *value);
|
||||
int hnsw_deserialize_index(HNSW *index);
|
||||
|
||||
// Helper function in case the user wants to directly copy
|
||||
// the vector bytes.
|
||||
uint32_t hnsw_quants_bytes(HNSW *index);
|
||||
|
||||
/* Cursors. */
|
||||
hnswCursor *hnsw_cursor_init(HNSW *index);
|
||||
void hnsw_cursor_free(hnswCursor *cursor);
|
||||
hnswNode *hnsw_cursor_next(hnswCursor *cursor);
|
||||
int hnsw_cursor_acquire_lock(hnswCursor *cursor);
|
||||
void hnsw_cursor_release_lock(hnswCursor *cursor);
|
||||
|
||||
/* Allocator selection. */
|
||||
void hnsw_set_allocator(void (*free_ptr)(void*), void *(*malloc_ptr)(size_t),
|
||||
void *(*realloc_ptr)(void*, size_t));
|
||||
|
||||
/* Testing. */
|
||||
int hnsw_validate_graph(HNSW *index, uint64_t *connected_nodes, int *reciprocal_links);
|
||||
void hnsw_test_graph_recall(HNSW *index, int test_ef, int verbose);
|
||||
float hnsw_distance(HNSW *index, hnswNode *a, hnswNode *b);
|
||||
int hnsw_ground_truth_with_filter
|
||||
(HNSW *index, const float *query_vector, uint32_t k,
|
||||
hnswNode **neighbors, float *distances, uint32_t slot,
|
||||
int query_vector_is_normalized,
|
||||
int (*filter_callback)(void *value, void *privdata),
|
||||
void *filter_privdata);
|
||||
|
||||
#endif /* HNSW_H */
|
|
@ -0,0 +1,282 @@
|
|||
#!/usr/bin/env python3
|
||||
#
|
||||
# Vector set tests.
|
||||
# A Redis instance should be running in the default port.
|
||||
#
|
||||
# Copyright (c) 2009-Present, Redis Ltd.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
# (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
# GNU Affero General Public License v3 (AGPLv3).
|
||||
#
|
||||
|
||||
import redis
|
||||
import random
|
||||
import struct
|
||||
import math
|
||||
import time
|
||||
import sys
|
||||
import os
|
||||
import importlib
|
||||
import inspect
|
||||
import argparse
|
||||
from typing import List, Tuple, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
def colored(text: str, color: str) -> str:
|
||||
colors = {
|
||||
'red': '\033[91m',
|
||||
'green': '\033[92m',
|
||||
'yellow': '\033[93m'
|
||||
}
|
||||
reset = '\033[0m'
|
||||
return f"{colors.get(color, '')}{text}{reset}"
|
||||
|
||||
@dataclass
|
||||
class VectorData:
|
||||
vectors: List[List[float]]
|
||||
names: List[str]
|
||||
|
||||
def find_k_nearest(self, query_vector: List[float], k: int) -> List[Tuple[str, float]]:
|
||||
"""Find k-nearest neighbors using the same scoring as Redis VSIM WITHSCORES."""
|
||||
similarities = []
|
||||
query_norm = math.sqrt(sum(x*x for x in query_vector))
|
||||
if query_norm == 0:
|
||||
return []
|
||||
|
||||
for i, vec in enumerate(self.vectors):
|
||||
vec_norm = math.sqrt(sum(x*x for x in vec))
|
||||
if vec_norm == 0:
|
||||
continue
|
||||
|
||||
dot_product = sum(a*b for a,b in zip(query_vector, vec))
|
||||
cosine_sim = dot_product / (query_norm * vec_norm)
|
||||
distance = 1.0 - cosine_sim
|
||||
redis_similarity = 1.0 - (distance/2.0)
|
||||
similarities.append((self.names[i], redis_similarity))
|
||||
|
||||
similarities.sort(key=lambda x: x[1], reverse=True)
|
||||
return similarities[:k]
|
||||
|
||||
def generate_random_vector(dim: int) -> List[float]:
|
||||
"""Generate a random normalized vector."""
|
||||
vec = [random.gauss(0, 1) for _ in range(dim)]
|
||||
norm = math.sqrt(sum(x*x for x in vec))
|
||||
return [x/norm for x in vec]
|
||||
|
||||
def fill_redis_with_vectors(r: redis.Redis, key: str, count: int, dim: int,
|
||||
with_reduce: Optional[int] = None) -> VectorData:
|
||||
"""Fill Redis with random vectors and return a VectorData object for verification."""
|
||||
vectors = []
|
||||
names = []
|
||||
|
||||
r.delete(key)
|
||||
for i in range(count):
|
||||
vec = generate_random_vector(dim)
|
||||
name = f"{key}:item:{i}"
|
||||
vectors.append(vec)
|
||||
names.append(name)
|
||||
|
||||
vec_bytes = struct.pack(f'{dim}f', *vec)
|
||||
args = [key]
|
||||
if with_reduce:
|
||||
args.extend(['REDUCE', with_reduce])
|
||||
args.extend(['FP32', vec_bytes, name])
|
||||
r.execute_command('VADD', *args)
|
||||
|
||||
return VectorData(vectors=vectors, names=names)
|
||||
|
||||
class TestCase:
|
||||
def __init__(self, primary_port=6379, replica_port=6380):
|
||||
self.error_msg = None
|
||||
self.error_details = None
|
||||
self.test_key = f"test:{self.__class__.__name__.lower()}"
|
||||
# Primary Redis instance
|
||||
self.redis = redis.Redis(port=primary_port)
|
||||
# Replica Redis instance
|
||||
self.replica = redis.Redis(port=replica_port)
|
||||
# Replication status
|
||||
self.replication_setup = False
|
||||
# Ports
|
||||
self.primary_port = primary_port
|
||||
self.replica_port = replica_port
|
||||
|
||||
def setup(self):
|
||||
self.redis.delete(self.test_key)
|
||||
|
||||
def teardown(self):
|
||||
self.redis.delete(self.test_key)
|
||||
|
||||
def setup_replication(self) -> bool:
|
||||
"""
|
||||
Setup replication between primary and replica Redis instances.
|
||||
Returns True if replication is successfully established, False otherwise.
|
||||
"""
|
||||
# Configure replica to replicate from primary
|
||||
self.replica.execute_command('REPLICAOF', '127.0.0.1', self.primary_port)
|
||||
|
||||
# Wait for replication to be established
|
||||
max_attempts = 10
|
||||
for attempt in range(max_attempts):
|
||||
# Check replication info
|
||||
repl_info = self.replica.info('replication')
|
||||
|
||||
# Check if replication is established
|
||||
if (repl_info.get('role') == 'slave' and
|
||||
repl_info.get('master_host') == '127.0.0.1' and
|
||||
repl_info.get('master_port') == self.primary_port and
|
||||
repl_info.get('master_link_status') == 'up'):
|
||||
|
||||
self.replication_setup = True
|
||||
return True
|
||||
|
||||
# Wait before next attempt
|
||||
time.sleep(0.5)
|
||||
|
||||
# If we get here, replication wasn't established
|
||||
self.error_msg = "Failed to establish replication between primary and replica"
|
||||
return False
|
||||
|
||||
def test(self):
|
||||
raise NotImplementedError("Subclasses must implement test method")
|
||||
|
||||
def run(self):
|
||||
try:
|
||||
self.setup()
|
||||
self.test()
|
||||
return True
|
||||
except AssertionError as e:
|
||||
self.error_msg = str(e)
|
||||
import traceback
|
||||
self.error_details = traceback.format_exc()
|
||||
return False
|
||||
except Exception as e:
|
||||
self.error_msg = f"Unexpected error: {str(e)}"
|
||||
import traceback
|
||||
self.error_details = traceback.format_exc()
|
||||
return False
|
||||
finally:
|
||||
self.teardown()
|
||||
|
||||
def getname(self):
|
||||
"""Each test class should override this to provide its name"""
|
||||
return self.__class__.__name__
|
||||
|
||||
def estimated_runtime(self):
|
||||
""""Each test class should override this if it takes a significant amount of time to run. Default is 100ms"""
|
||||
return 0.1
|
||||
|
||||
def find_test_classes(primary_port, replica_port):
|
||||
test_classes = []
|
||||
tests_dir = 'tests'
|
||||
|
||||
if not os.path.exists(tests_dir):
|
||||
return []
|
||||
|
||||
for file in os.listdir(tests_dir):
|
||||
if file.endswith('.py'):
|
||||
module_name = f"tests.{file[:-3]}"
|
||||
try:
|
||||
module = importlib.import_module(module_name)
|
||||
for name, obj in inspect.getmembers(module):
|
||||
if inspect.isclass(obj) and obj.__name__ != 'TestCase' and hasattr(obj, 'test'):
|
||||
# Create test instance with specified ports
|
||||
test_instance = obj()
|
||||
test_instance.redis = redis.Redis(port=primary_port)
|
||||
test_instance.replica = redis.Redis(port=replica_port)
|
||||
test_instance.primary_port = primary_port
|
||||
test_instance.replica_port = replica_port
|
||||
test_classes.append(test_instance)
|
||||
except Exception as e:
|
||||
print(f"Error loading {file}: {e}")
|
||||
|
||||
return test_classes
|
||||
|
||||
def check_redis_empty(r, instance_name):
|
||||
"""Check if Redis instance is empty"""
|
||||
try:
|
||||
dbsize = r.dbsize()
|
||||
if dbsize > 0:
|
||||
print(colored(f"ERROR: {instance_name} Redis instance is not empty (dbsize: {dbsize}).", "red"))
|
||||
print(colored("Make sure you're not using a production instance and that all data is safe to delete.", "red"))
|
||||
sys.exit(1)
|
||||
except redis.exceptions.ConnectionError:
|
||||
print(colored(f"ERROR: Cannot connect to {instance_name} Redis instance.", "red"))
|
||||
sys.exit(1)
|
||||
|
||||
def check_replica_running(replica_port):
|
||||
"""Check if replica Redis instance is running"""
|
||||
r = redis.Redis(port=replica_port)
|
||||
try:
|
||||
r.ping()
|
||||
return True
|
||||
except redis.exceptions.ConnectionError:
|
||||
print(colored(f"WARNING: Replica Redis instance (port {replica_port}) is not running.", "yellow"))
|
||||
print(colored("Replication tests will fail. Make sure to start the replica instance.", "yellow"))
|
||||
return False
|
||||
|
||||
def run_tests():
|
||||
# Parse command line arguments
|
||||
parser = argparse.ArgumentParser(description='Run Redis vector tests.')
|
||||
parser.add_argument('--primary-port', type=int, default=6379, help='Primary Redis instance port (default: 6379)')
|
||||
parser.add_argument('--replica-port', type=int, default=6380, help='Replica Redis instance port (default: 6380)')
|
||||
args = parser.parse_args()
|
||||
|
||||
print("================================================")
|
||||
print(f"Make sure to have Redis running on localhost")
|
||||
print(f"Primary port: {args.primary_port}")
|
||||
print(f"Replica port: {args.replica_port}")
|
||||
print("with --enable-debug-command yes")
|
||||
print("================================================\n")
|
||||
|
||||
# Check if Redis instances are empty
|
||||
primary = redis.Redis(port=args.primary_port)
|
||||
replica = redis.Redis(port=args.replica_port)
|
||||
|
||||
check_redis_empty(primary, "Primary")
|
||||
|
||||
# Check if replica is running
|
||||
replica_running = check_replica_running(args.replica_port)
|
||||
if replica_running:
|
||||
check_redis_empty(replica, "Replica")
|
||||
|
||||
tests = find_test_classes(args.primary_port, args.replica_port)
|
||||
if not tests:
|
||||
print("No tests found!")
|
||||
return
|
||||
|
||||
# Sort tests by estimated runtime
|
||||
tests.sort(key=lambda t: t.estimated_runtime())
|
||||
|
||||
passed = 0
|
||||
total = len(tests)
|
||||
|
||||
for test in tests:
|
||||
print(f"{test.getname()}: ", end="")
|
||||
sys.stdout.flush()
|
||||
|
||||
start_time = time.time()
|
||||
success = test.run()
|
||||
duration = time.time() - start_time
|
||||
|
||||
if success:
|
||||
print(colored("OK", "green"), f"({duration:.2f}s)")
|
||||
passed += 1
|
||||
else:
|
||||
print(colored("ERR", "red"), f"({duration:.2f}s)")
|
||||
print(f"Error: {test.error_msg}")
|
||||
if test.error_details:
|
||||
print("\nTraceback:")
|
||||
print(test.error_details)
|
||||
|
||||
print("\n" + "="*50)
|
||||
print(f"\nTest Summary: {passed}/{total} tests passed")
|
||||
|
||||
if passed == total:
|
||||
print(colored("\nALL TESTS PASSED!", "green"))
|
||||
else:
|
||||
print(colored(f"\n{total-passed} TESTS FAILED!", "red"))
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_tests()
|
|
@ -0,0 +1,21 @@
|
|||
from test import TestCase, generate_random_vector
|
||||
import struct
|
||||
|
||||
class BasicCommands(TestCase):
|
||||
def getname(self):
|
||||
return "VADD, VDIM, VCARD basic usage"
|
||||
|
||||
def test(self):
|
||||
# Test VADD
|
||||
vec = generate_random_vector(4)
|
||||
vec_bytes = struct.pack('4f', *vec)
|
||||
result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1')
|
||||
assert result == 1, "VADD should return 1 for first item"
|
||||
|
||||
# Test VDIM
|
||||
dim = self.redis.execute_command('VDIM', self.test_key)
|
||||
assert dim == 4, f"VDIM should return 4, got {dim}"
|
||||
|
||||
# Test VCARD
|
||||
card = self.redis.execute_command('VCARD', self.test_key)
|
||||
assert card == 1, f"VCARD should return 1, got {card}"
|
|
@ -0,0 +1,35 @@
|
|||
from test import TestCase
|
||||
|
||||
class BasicSimilarity(TestCase):
|
||||
def getname(self):
|
||||
return "VSIM reported distance makes sense with 4D vectors"
|
||||
|
||||
def test(self):
|
||||
# Add two very similar vectors, one different
|
||||
vec1 = [1, 0, 0, 0]
|
||||
vec2 = [0.99, 0.01, 0, 0]
|
||||
vec3 = [0.1, 1, -1, 0.5]
|
||||
|
||||
# Add vectors using VALUES format
|
||||
self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1], f'{self.test_key}:item:1')
|
||||
self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec2], f'{self.test_key}:item:2')
|
||||
self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec3], f'{self.test_key}:item:3')
|
||||
|
||||
# Query similarity with vec1
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1], 'WITHSCORES')
|
||||
|
||||
# Convert results to dictionary
|
||||
results_dict = {}
|
||||
for i in range(0, len(result), 2):
|
||||
key = result[i].decode()
|
||||
score = float(result[i+1])
|
||||
results_dict[key] = score
|
||||
|
||||
# Verify results
|
||||
assert results_dict[f'{self.test_key}:item:1'] > 0.99, "Self-similarity should be very high"
|
||||
assert results_dict[f'{self.test_key}:item:2'] > 0.99, "Similar vector should have high similarity"
|
||||
assert results_dict[f'{self.test_key}:item:3'] < 0.8, "Not very similar vector should have low similarity"
|
|
@ -0,0 +1,156 @@
|
|||
from test import TestCase, generate_random_vector
|
||||
import threading
|
||||
import time
|
||||
import struct
|
||||
|
||||
class ThreadingStressTest(TestCase):
|
||||
def getname(self):
|
||||
return "Concurrent VADD/DEL/VSIM operations stress test"
|
||||
|
||||
def estimated_runtime(self):
|
||||
return 10 # Test runs for 10 seconds
|
||||
|
||||
def test(self):
|
||||
# Constants - easy to modify if needed
|
||||
NUM_VADD_THREADS = 10
|
||||
NUM_VSIM_THREADS = 1
|
||||
NUM_DEL_THREADS = 1
|
||||
TEST_DURATION = 10 # seconds
|
||||
VECTOR_DIM = 100
|
||||
DEL_INTERVAL = 1 # seconds
|
||||
|
||||
# Shared flags and state
|
||||
stop_event = threading.Event()
|
||||
error_list = []
|
||||
error_lock = threading.Lock()
|
||||
|
||||
def log_error(thread_name, error):
|
||||
with error_lock:
|
||||
error_list.append(f"{thread_name}: {error}")
|
||||
|
||||
def vadd_worker(thread_id):
|
||||
"""Thread function to perform VADD operations"""
|
||||
thread_name = f"VADD-{thread_id}"
|
||||
try:
|
||||
vector_count = 0
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
# Generate random vector
|
||||
vec = generate_random_vector(VECTOR_DIM)
|
||||
vec_bytes = struct.pack(f'{VECTOR_DIM}f', *vec)
|
||||
|
||||
# Add vector with CAS option
|
||||
self.redis.execute_command(
|
||||
'VADD',
|
||||
self.test_key,
|
||||
'FP32',
|
||||
vec_bytes,
|
||||
f'{self.test_key}:item:{thread_id}:{vector_count}',
|
||||
'CAS'
|
||||
)
|
||||
|
||||
vector_count += 1
|
||||
|
||||
# Small sleep to reduce CPU pressure
|
||||
if vector_count % 10 == 0:
|
||||
time.sleep(0.001)
|
||||
except Exception as e:
|
||||
log_error(thread_name, f"Error: {str(e)}")
|
||||
time.sleep(0.1) # Slight backoff on error
|
||||
except Exception as e:
|
||||
log_error(thread_name, f"Thread error: {str(e)}")
|
||||
|
||||
def del_worker():
|
||||
"""Thread function that deletes the key periodically"""
|
||||
thread_name = "DEL"
|
||||
try:
|
||||
del_count = 0
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
# Sleep first, then delete
|
||||
time.sleep(DEL_INTERVAL)
|
||||
if stop_event.is_set():
|
||||
break
|
||||
|
||||
self.redis.delete(self.test_key)
|
||||
del_count += 1
|
||||
except Exception as e:
|
||||
log_error(thread_name, f"Error: {str(e)}")
|
||||
except Exception as e:
|
||||
log_error(thread_name, f"Thread error: {str(e)}")
|
||||
|
||||
def vsim_worker(thread_id):
|
||||
"""Thread function to perform VSIM operations"""
|
||||
thread_name = f"VSIM-{thread_id}"
|
||||
try:
|
||||
search_count = 0
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
# Generate query vector
|
||||
query_vec = generate_random_vector(VECTOR_DIM)
|
||||
query_str = [str(x) for x in query_vec]
|
||||
|
||||
# Perform similarity search
|
||||
args = ['VSIM', self.test_key, 'VALUES', VECTOR_DIM]
|
||||
args.extend(query_str)
|
||||
args.extend(['COUNT', 10])
|
||||
self.redis.execute_command(*args)
|
||||
|
||||
search_count += 1
|
||||
|
||||
# Small sleep to reduce CPU pressure
|
||||
if search_count % 10 == 0:
|
||||
time.sleep(0.005)
|
||||
except Exception as e:
|
||||
# Don't log empty array errors, as they're expected when key doesn't exist
|
||||
if "empty array" not in str(e).lower():
|
||||
log_error(thread_name, f"Error: {str(e)}")
|
||||
time.sleep(0.1) # Slight backoff on error
|
||||
except Exception as e:
|
||||
log_error(thread_name, f"Thread error: {str(e)}")
|
||||
|
||||
# Start all threads
|
||||
threads = []
|
||||
|
||||
# VADD threads
|
||||
for i in range(NUM_VADD_THREADS):
|
||||
thread = threading.Thread(target=vadd_worker, args=(i,))
|
||||
thread.start()
|
||||
threads.append(thread)
|
||||
|
||||
# DEL threads
|
||||
for _ in range(NUM_DEL_THREADS):
|
||||
thread = threading.Thread(target=del_worker)
|
||||
thread.start()
|
||||
threads.append(thread)
|
||||
|
||||
# VSIM threads
|
||||
for i in range(NUM_VSIM_THREADS):
|
||||
thread = threading.Thread(target=vsim_worker, args=(i,))
|
||||
thread.start()
|
||||
threads.append(thread)
|
||||
|
||||
# Let the test run for the specified duration
|
||||
time.sleep(TEST_DURATION)
|
||||
|
||||
# Signal all threads to stop
|
||||
stop_event.set()
|
||||
|
||||
# Wait for threads to finish
|
||||
for thread in threads:
|
||||
thread.join(timeout=2.0)
|
||||
|
||||
# Check if Redis is still responsive
|
||||
try:
|
||||
ping_result = self.redis.ping()
|
||||
assert ping_result, "Redis did not respond to PING after stress test"
|
||||
except Exception as e:
|
||||
assert False, f"Redis connection failed after stress test: {str(e)}"
|
||||
|
||||
# Report any errors for diagnosis, but don't fail the test unless PING fails
|
||||
if error_list:
|
||||
error_count = len(error_list)
|
||||
print(f"\nEncountered {error_count} errors during stress test.")
|
||||
print("First 5 errors:")
|
||||
for error in error_list[:5]:
|
||||
print(f"- {error}")
|
|
@ -0,0 +1,48 @@
|
|||
from test import TestCase, fill_redis_with_vectors, generate_random_vector
|
||||
import threading, time
|
||||
|
||||
class ConcurrentVSIMAndDEL(TestCase):
|
||||
def getname(self):
|
||||
return "Concurrent VSIM and DEL operations"
|
||||
|
||||
def estimated_runtime(self):
|
||||
return 2
|
||||
|
||||
def test(self):
|
||||
# Fill the key with 5000 random vectors
|
||||
dim = 128
|
||||
count = 5000
|
||||
fill_redis_with_vectors(self.redis, self.test_key, count, dim)
|
||||
|
||||
# List to store results from threads
|
||||
thread_results = []
|
||||
|
||||
def vsim_thread():
|
||||
"""Thread function to perform VSIM operations until the key is deleted"""
|
||||
while True:
|
||||
query_vec = generate_random_vector(dim)
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
|
||||
*[str(x) for x in query_vec], 'COUNT', 10)
|
||||
if not result:
|
||||
# Empty array detected, key is deleted
|
||||
thread_results.append(True)
|
||||
break
|
||||
|
||||
# Start multiple threads to perform VSIM operations
|
||||
threads = []
|
||||
for _ in range(4): # Start 4 threads
|
||||
t = threading.Thread(target=vsim_thread)
|
||||
t.start()
|
||||
threads.append(t)
|
||||
|
||||
# Delete the key while threads are still running
|
||||
time.sleep(1)
|
||||
self.redis.delete(self.test_key)
|
||||
|
||||
# Wait for all threads to finish (they will exit once they detect the key is deleted)
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
# Verify that all threads detected an empty array or error
|
||||
assert len(thread_results) == len(threads), "Not all threads detected the key deletion"
|
||||
assert all(thread_results), "Some threads did not detect an empty array or error after DEL"
|
|
@ -0,0 +1,39 @@
|
|||
from test import TestCase, generate_random_vector
|
||||
import struct
|
||||
|
||||
class DebugDigestTest(TestCase):
|
||||
def getname(self):
|
||||
return "[regression] DEBUG DIGEST-VALUE with attributes"
|
||||
|
||||
def test(self):
|
||||
# Generate random vectors
|
||||
vec1 = generate_random_vector(4)
|
||||
vec2 = generate_random_vector(4)
|
||||
vec_bytes1 = struct.pack('4f', *vec1)
|
||||
vec_bytes2 = struct.pack('4f', *vec2)
|
||||
|
||||
# Add vectors to the key, one with attribute, one without
|
||||
self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes1, f'{self.test_key}:item:1')
|
||||
self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes2, f'{self.test_key}:item:2', 'SETATTR', '{"color":"red"}')
|
||||
|
||||
# Call DEBUG DIGEST-VALUE on the key
|
||||
try:
|
||||
digest1 = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
|
||||
assert digest1 is not None, "DEBUG DIGEST-VALUE should return a value"
|
||||
|
||||
# Change attribute and verify digest changes
|
||||
self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:2', '{"color":"blue"}')
|
||||
|
||||
digest2 = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
|
||||
assert digest2 is not None, "DEBUG DIGEST-VALUE should return a value after attribute change"
|
||||
assert digest1 != digest2, "Digest should change when an attribute is modified"
|
||||
|
||||
# Remove attribute and verify digest changes again
|
||||
self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:2', '')
|
||||
|
||||
digest3 = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
|
||||
assert digest3 is not None, "DEBUG DIGEST-VALUE should return a value after attribute removal"
|
||||
assert digest2 != digest3, "Digest should change when an attribute is removed"
|
||||
|
||||
except Exception as e:
|
||||
raise AssertionError(f"DEBUG DIGEST-VALUE command failed: {str(e)}")
|
|
@ -0,0 +1,173 @@
|
|||
from test import TestCase, fill_redis_with_vectors, generate_random_vector
|
||||
import random
|
||||
|
||||
"""
|
||||
A note about this test:
|
||||
It was experimentally tried to modify hnsw.c in order to
|
||||
avoid calling hnsw_reconnect_nodes(). In this case, the test
|
||||
fails very often with EF set to 250, while it hardly
|
||||
fails at all with the same parameters if hnsw_reconnect_nodes()
|
||||
is called.
|
||||
|
||||
Note that for the nature of the test (it is very strict) it can
|
||||
still fail from time to time, without this signaling any
|
||||
actual bug.
|
||||
"""
|
||||
|
||||
class VREM(TestCase):
|
||||
def getname(self):
|
||||
return "Deletion and graph state after deletion"
|
||||
|
||||
def estimated_runtime(self):
|
||||
return 2.0
|
||||
|
||||
def format_neighbors_with_scores(self, links_result, old_links=None, items_to_remove=None):
|
||||
"""Format neighbors with their similarity scores and status indicators"""
|
||||
if not links_result:
|
||||
return "No neighbors"
|
||||
|
||||
output = []
|
||||
for level, neighbors in enumerate(links_result):
|
||||
level_num = len(links_result) - level - 1
|
||||
output.append(f"Level {level_num}:")
|
||||
|
||||
# Get neighbors and scores
|
||||
neighbors_with_scores = []
|
||||
for i in range(0, len(neighbors), 2):
|
||||
neighbor = neighbors[i].decode() if isinstance(neighbors[i], bytes) else neighbors[i]
|
||||
score = float(neighbors[i+1]) if i+1 < len(neighbors) else None
|
||||
status = ""
|
||||
|
||||
# For old links, mark deleted ones
|
||||
if items_to_remove and neighbor in items_to_remove:
|
||||
status = " [lost]"
|
||||
# For new links, mark newly added ones
|
||||
elif old_links is not None:
|
||||
# Check if this neighbor was in the old links at this level
|
||||
was_present = False
|
||||
if old_links and level < len(old_links):
|
||||
old_neighbors = [n.decode() if isinstance(n, bytes) else n
|
||||
for n in old_links[level]]
|
||||
was_present = neighbor in old_neighbors
|
||||
if not was_present:
|
||||
status = " [gained]"
|
||||
|
||||
if score is not None:
|
||||
neighbors_with_scores.append(f"{len(neighbors_with_scores)+1}. {neighbor} ({score:.6f}){status}")
|
||||
else:
|
||||
neighbors_with_scores.append(f"{len(neighbors_with_scores)+1}. {neighbor}{status}")
|
||||
|
||||
output.extend([" " + n for n in neighbors_with_scores])
|
||||
return "\n".join(output)
|
||||
|
||||
def test(self):
|
||||
# 1. Fill server with random elements
|
||||
dim = 128
|
||||
count = 5000
|
||||
data = fill_redis_with_vectors(self.redis, self.test_key, count, dim)
|
||||
|
||||
# 2. Do VSIM to get 200 items
|
||||
query_vec = generate_random_vector(dim)
|
||||
results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
|
||||
*[str(x) for x in query_vec],
|
||||
'COUNT', 200, 'WITHSCORES')
|
||||
|
||||
# Convert results to list of (item, score) pairs, sorted by score
|
||||
items = []
|
||||
for i in range(0, len(results), 2):
|
||||
item = results[i].decode()
|
||||
score = float(results[i+1])
|
||||
items.append((item, score))
|
||||
items.sort(key=lambda x: x[1], reverse=True) # Sort by similarity
|
||||
|
||||
# Store the graph structure for all items before deletion
|
||||
neighbors_before = {}
|
||||
for item, _ in items:
|
||||
links = self.redis.execute_command('VLINKS', self.test_key, item, 'WITHSCORES')
|
||||
if links: # Some items might not have links
|
||||
neighbors_before[item] = links
|
||||
|
||||
# 3. Remove 100 random items
|
||||
items_to_remove = set(item for item, _ in random.sample(items, 100))
|
||||
# Keep track of top 10 non-removed items
|
||||
top_remaining = []
|
||||
for item, score in items:
|
||||
if item not in items_to_remove:
|
||||
top_remaining.append((item, score))
|
||||
if len(top_remaining) == 10:
|
||||
break
|
||||
|
||||
# Remove the items
|
||||
for item in items_to_remove:
|
||||
result = self.redis.execute_command('VREM', self.test_key, item)
|
||||
assert result == 1, f"VREM failed to remove {item}"
|
||||
|
||||
# 4. Do VSIM again with same vector
|
||||
new_results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
|
||||
*[str(x) for x in query_vec],
|
||||
'COUNT', 200, 'WITHSCORES',
|
||||
'EF', 500)
|
||||
|
||||
# Convert new results to dict of item -> score
|
||||
new_scores = {}
|
||||
for i in range(0, len(new_results), 2):
|
||||
item = new_results[i].decode()
|
||||
score = float(new_results[i+1])
|
||||
new_scores[item] = score
|
||||
|
||||
failure = False
|
||||
failed_item = None
|
||||
failed_reason = None
|
||||
# 5. Verify all top 10 non-removed items are still found with similar scores
|
||||
for item, old_score in top_remaining:
|
||||
if item not in new_scores:
|
||||
failure = True
|
||||
failed_item = item
|
||||
failed_reason = "missing"
|
||||
break
|
||||
new_score = new_scores[item]
|
||||
if abs(new_score - old_score) >= 0.01:
|
||||
failure = True
|
||||
failed_item = item
|
||||
failed_reason = f"score changed: {old_score:.6f} -> {new_score:.6f}"
|
||||
break
|
||||
|
||||
if failure:
|
||||
print("\nTest failed!")
|
||||
print(f"Problem with item: {failed_item} ({failed_reason})")
|
||||
|
||||
print("\nOriginal neighbors (with similarity scores):")
|
||||
if failed_item in neighbors_before:
|
||||
print(self.format_neighbors_with_scores(
|
||||
neighbors_before[failed_item],
|
||||
items_to_remove=items_to_remove))
|
||||
else:
|
||||
print("No neighbors found in original graph")
|
||||
|
||||
print("\nCurrent neighbors (with similarity scores):")
|
||||
current_links = self.redis.execute_command('VLINKS', self.test_key,
|
||||
failed_item, 'WITHSCORES')
|
||||
if current_links:
|
||||
print(self.format_neighbors_with_scores(
|
||||
current_links,
|
||||
old_links=neighbors_before.get(failed_item)))
|
||||
else:
|
||||
print("No neighbors in current graph")
|
||||
|
||||
print("\nOriginal results (top 20):")
|
||||
for item, score in items[:20]:
|
||||
deleted = "[deleted]" if item in items_to_remove else ""
|
||||
print(f"{item}: {score:.6f} {deleted}")
|
||||
|
||||
print("\nNew results after removal (top 20):")
|
||||
new_items = []
|
||||
for i in range(0, len(new_results), 2):
|
||||
item = new_results[i].decode()
|
||||
score = float(new_results[i+1])
|
||||
new_items.append((item, score))
|
||||
new_items.sort(key=lambda x: x[1], reverse=True)
|
||||
for item, score in new_items[:20]:
|
||||
print(f"{item}: {score:.6f}")
|
||||
|
||||
raise AssertionError(f"Test failed: Problem with item {failed_item} ({failed_reason}). *** IMPORTANT *** This test may fail from time to time without indicating that there is a bug. However normally it should pass. The fact is that it's a quite extreme test where we destroy 50% of nodes of top results and still expect perfect recall, with vectors that are very hostile because of the distribution used.")
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
from test import TestCase, generate_random_vector
|
||||
import struct
|
||||
import redis.exceptions
|
||||
|
||||
class DimensionValidation(TestCase):
|
||||
def getname(self):
|
||||
return "[regression] Dimension Validation with Projection"
|
||||
|
||||
def estimated_runtime(self):
|
||||
return 0.5
|
||||
|
||||
def test(self):
|
||||
# Test scenario 1: Create a set with projection
|
||||
original_dim = 100
|
||||
reduced_dim = 50
|
||||
|
||||
# Create the initial vector and set with projection
|
||||
vec1 = generate_random_vector(original_dim)
|
||||
vec1_bytes = struct.pack(f'{original_dim}f', *vec1)
|
||||
|
||||
# Add first vector with projection
|
||||
result = self.redis.execute_command('VADD', self.test_key,
|
||||
'REDUCE', reduced_dim,
|
||||
'FP32', vec1_bytes, f'{self.test_key}:item:1')
|
||||
assert result == 1, "First VADD with REDUCE should return 1"
|
||||
|
||||
# Check VINFO returns the correct projection information
|
||||
info = self.redis.execute_command('VINFO', self.test_key)
|
||||
info_map = {k.decode('utf-8'): v for k, v in zip(info[::2], info[1::2])}
|
||||
assert 'vector-dim' in info_map, "VINFO should contain vector-dim"
|
||||
assert info_map['vector-dim'] == reduced_dim, f"Expected reduced dimension {reduced_dim}, got {info['vector-dim']}"
|
||||
assert 'projection-input-dim' in info_map, "VINFO should contain projection-input-dim"
|
||||
assert info_map['projection-input-dim'] == original_dim, f"Expected original dimension {original_dim}, got {info['projection-input-dim']}"
|
||||
|
||||
# Test scenario 2: Try adding a mismatched vector - should fail
|
||||
wrong_dim = 80
|
||||
wrong_vec = generate_random_vector(wrong_dim)
|
||||
wrong_vec_bytes = struct.pack(f'{wrong_dim}f', *wrong_vec)
|
||||
|
||||
# This should fail with dimension mismatch error
|
||||
try:
|
||||
self.redis.execute_command('VADD', self.test_key,
|
||||
'REDUCE', reduced_dim,
|
||||
'FP32', wrong_vec_bytes, f'{self.test_key}:item:2')
|
||||
assert False, "VADD with wrong dimension should fail"
|
||||
except redis.exceptions.ResponseError as e:
|
||||
assert "Input dimension mismatch for projection" in str(e), f"Expected dimension mismatch error, got: {e}"
|
||||
|
||||
# Test scenario 3: Add a correctly-sized vector
|
||||
vec2 = generate_random_vector(original_dim)
|
||||
vec2_bytes = struct.pack(f'{original_dim}f', *vec2)
|
||||
|
||||
# This should succeed
|
||||
result = self.redis.execute_command('VADD', self.test_key,
|
||||
'REDUCE', reduced_dim,
|
||||
'FP32', vec2_bytes, f'{self.test_key}:item:3')
|
||||
assert result == 1, "VADD with correct dimensions should succeed"
|
||||
|
||||
# Check VSIM also validates input dimensions
|
||||
wrong_query = generate_random_vector(wrong_dim)
|
||||
try:
|
||||
self.redis.execute_command('VSIM', self.test_key,
|
||||
'VALUES', wrong_dim, *[str(x) for x in wrong_query],
|
||||
'COUNT', 10)
|
||||
assert False, "VSIM with wrong dimension should fail"
|
||||
except redis.exceptions.ResponseError as e:
|
||||
assert "Input dimension mismatch for projection" in str(e), f"Expected dimension mismatch error in VSIM, got: {e}"
|
|
@ -0,0 +1,27 @@
|
|||
from test import TestCase, generate_random_vector
|
||||
import struct
|
||||
|
||||
class VREM_LastItemDeletesKey(TestCase):
|
||||
def getname(self):
|
||||
return "VREM last item deletes key"
|
||||
|
||||
def test(self):
|
||||
# Generate a random vector
|
||||
vec = generate_random_vector(4)
|
||||
vec_bytes = struct.pack('4f', *vec)
|
||||
|
||||
# Add the vector to the key
|
||||
result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1')
|
||||
assert result == 1, "VADD should return 1 for first item"
|
||||
|
||||
# Verify the key exists
|
||||
exists = self.redis.exists(self.test_key)
|
||||
assert exists == 1, "Key should exist after VADD"
|
||||
|
||||
# Remove the item
|
||||
result = self.redis.execute_command('VREM', self.test_key, f'{self.test_key}:item:1')
|
||||
assert result == 1, "VREM should return 1 for successful removal"
|
||||
|
||||
# Verify the key no longer exists
|
||||
exists = self.redis.exists(self.test_key)
|
||||
assert exists == 0, "Key should no longer exist after VREM of last item"
|
|
@ -0,0 +1,177 @@
|
|||
from test import TestCase
|
||||
|
||||
class VSIMFilterExpressions(TestCase):
|
||||
def getname(self):
|
||||
return "VSIM FILTER expressions basic functionality"
|
||||
|
||||
def test(self):
|
||||
# Create a small set of vectors with different attributes
|
||||
|
||||
# Basic vectors for testing - all orthogonal for clear results
|
||||
vec1 = [1, 0, 0, 0]
|
||||
vec2 = [0, 1, 0, 0]
|
||||
vec3 = [0, 0, 1, 0]
|
||||
vec4 = [0, 0, 0, 1]
|
||||
vec5 = [0.5, 0.5, 0, 0]
|
||||
|
||||
# Add vectors with various attributes
|
||||
self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1], f'{self.test_key}:item:1')
|
||||
self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:1',
|
||||
'{"age": 25, "name": "Alice", "active": true, "scores": [85, 90, 95], "city": "New York"}')
|
||||
|
||||
self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec2], f'{self.test_key}:item:2')
|
||||
self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:2',
|
||||
'{"age": 30, "name": "Bob", "active": false, "scores": [70, 75, 80], "city": "Boston"}')
|
||||
|
||||
self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec3], f'{self.test_key}:item:3')
|
||||
self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:3',
|
||||
'{"age": 35, "name": "Charlie", "scores": [60, 65, 70], "city": "Seattle"}')
|
||||
|
||||
self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec4], f'{self.test_key}:item:4')
|
||||
# Item 4 has no attribute at all
|
||||
|
||||
self.redis.execute_command('VADD', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec5], f'{self.test_key}:item:5')
|
||||
self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:5',
|
||||
'invalid json') # Intentionally malformed JSON
|
||||
|
||||
# Test 1: Basic equality with numbers
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age == 25')
|
||||
assert len(result) == 1, "Expected 1 result for age == 25"
|
||||
assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for age == 25"
|
||||
|
||||
# Test 2: Greater than
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age > 25')
|
||||
assert len(result) == 2, "Expected 2 results for age > 25"
|
||||
|
||||
# Test 3: Less than or equal
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age <= 30')
|
||||
assert len(result) == 2, "Expected 2 results for age <= 30"
|
||||
|
||||
# Test 4: String equality
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.name == "Alice"')
|
||||
assert len(result) == 1, "Expected 1 result for name == Alice"
|
||||
assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for name == Alice"
|
||||
|
||||
# Test 5: String inequality
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.name != "Alice"')
|
||||
assert len(result) == 2, "Expected 2 results for name != Alice"
|
||||
|
||||
# Test 6: Boolean value
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.active')
|
||||
assert len(result) == 1, "Expected 1 result for .active being true"
|
||||
|
||||
# Test 7: Logical AND
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age > 20 and .age < 30')
|
||||
assert len(result) == 1, "Expected 1 result for 20 < age < 30"
|
||||
assert result[0].decode() == f'{self.test_key}:item:1', "Expected item:1 for 20 < age < 30"
|
||||
|
||||
# Test 8: Logical OR
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age < 30 or .age > 35')
|
||||
assert len(result) == 1, "Expected 1 result for age < 30 or age > 35"
|
||||
|
||||
# Test 9: Logical NOT
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '!(.age == 25)')
|
||||
assert len(result) == 2, "Expected 2 results for NOT(age == 25)"
|
||||
|
||||
# Test 10: The "in" operator with array
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age in [25, 35]')
|
||||
assert len(result) == 2, "Expected 2 results for age in [25, 35]"
|
||||
|
||||
# Test 11: The "in" operator with strings in array
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.name in ["Alice", "David"]')
|
||||
assert len(result) == 1, "Expected 1 result for name in [Alice, David]"
|
||||
|
||||
# Test 12: Arithmetic operations - addition
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age + 10 > 40')
|
||||
assert len(result) == 1, "Expected 1 result for age + 10 > 40"
|
||||
|
||||
# Test 13: Arithmetic operations - multiplication
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age * 2 > 60')
|
||||
assert len(result) == 1, "Expected 1 result for age * 2 > 60"
|
||||
|
||||
# Test 14: Arithmetic operations - division
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age / 5 == 5')
|
||||
assert len(result) == 1, "Expected 1 result for age / 5 == 5"
|
||||
|
||||
# Test 15: Arithmetic operations - modulo
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age % 2 == 0')
|
||||
assert len(result) == 1, "Expected 1 result for age % 2 == 0"
|
||||
|
||||
# Test 16: Power operator
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age ** 2 > 900')
|
||||
assert len(result) == 1, "Expected 1 result for age^2 > 900"
|
||||
|
||||
# Test 17: Missing attribute (should exclude items missing that attribute)
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.missing_field == "value"')
|
||||
assert len(result) == 0, "Expected 0 results for missing_field == value"
|
||||
|
||||
# Test 18: No attribute set at all
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.any_field')
|
||||
assert f'{self.test_key}:item:4' not in [item.decode() for item in result], "Item with no attribute should be excluded"
|
||||
|
||||
# Test 19: Malformed JSON
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.any_field')
|
||||
assert f'{self.test_key}:item:5' not in [item.decode() for item in result], "Item with malformed JSON should be excluded"
|
||||
|
||||
# Test 20: Complex expression combining multiple operators
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '(.age > 20 and .age < 40) and (.city == "Boston" or .city == "New York")')
|
||||
assert len(result) == 2, "Expected 2 results for the complex expression"
|
||||
expected_items = [f'{self.test_key}:item:1', f'{self.test_key}:item:2']
|
||||
assert set([item.decode() for item in result]) == set(expected_items), "Expected item:1 and item:2 for the complex expression"
|
||||
|
||||
# Test 21: Parentheses to control operator precedence
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.age > (20 + 10)')
|
||||
assert len(result) == 1, "Expected 1 result for age > (20 + 10)"
|
||||
|
||||
# Test 22: Array access (arrays evaluate to true)
|
||||
result = self.redis.execute_command('VSIM', self.test_key, 'VALUES', 4,
|
||||
*[str(x) for x in vec1],
|
||||
'FILTER', '.scores')
|
||||
assert len(result) == 3, "Expected 3 results for .scores (arrays evaluate to true)"
|
|
@ -0,0 +1,668 @@
|
|||
from test import TestCase, generate_random_vector
|
||||
import struct
|
||||
import random
|
||||
import math
|
||||
import json
|
||||
import time
|
||||
|
||||
class VSIMFilterAdvanced(TestCase):
|
||||
def getname(self):
|
||||
return "VSIM FILTER comprehensive functionality testing"
|
||||
|
||||
def estimated_runtime(self):
|
||||
return 15 # This test might take up to 15 seconds for the large dataset
|
||||
|
||||
def setup(self):
|
||||
super().setup()
|
||||
self.dim = 32 # Vector dimension
|
||||
self.count = 5000 # Number of vectors for large tests
|
||||
self.small_count = 50 # Number of vectors for small/quick tests
|
||||
|
||||
# Categories for attributes
|
||||
self.categories = ["electronics", "furniture", "clothing", "books", "food"]
|
||||
self.cities = ["New York", "London", "Tokyo", "Paris", "Berlin", "Sydney", "Toronto", "Singapore"]
|
||||
self.price_ranges = [(10, 50), (50, 200), (200, 1000), (1000, 5000)]
|
||||
self.years = list(range(2000, 2025))
|
||||
|
||||
def create_attributes(self, index):
|
||||
"""Create realistic attributes for a vector"""
|
||||
category = random.choice(self.categories)
|
||||
city = random.choice(self.cities)
|
||||
min_price, max_price = random.choice(self.price_ranges)
|
||||
price = round(random.uniform(min_price, max_price), 2)
|
||||
year = random.choice(self.years)
|
||||
in_stock = random.random() > 0.3 # 70% chance of being in stock
|
||||
rating = round(random.uniform(1, 5), 1)
|
||||
views = int(random.expovariate(1/1000)) # Exponential distribution for page views
|
||||
tags = random.sample(["popular", "sale", "new", "limited", "exclusive", "clearance"],
|
||||
k=random.randint(0, 3))
|
||||
|
||||
# Add some specific patterns for testing
|
||||
# Every 10th item has a specific property combination for testing
|
||||
is_premium = (index % 10 == 0)
|
||||
|
||||
# Create attributes dictionary
|
||||
attrs = {
|
||||
"id": index,
|
||||
"category": category,
|
||||
"location": city,
|
||||
"price": price,
|
||||
"year": year,
|
||||
"in_stock": in_stock,
|
||||
"rating": rating,
|
||||
"views": views,
|
||||
"tags": tags
|
||||
}
|
||||
|
||||
if is_premium:
|
||||
attrs["is_premium"] = True
|
||||
attrs["special_features"] = ["premium", "warranty", "support"]
|
||||
|
||||
# Add sub-categories for more complex filters
|
||||
if category == "electronics":
|
||||
attrs["subcategory"] = random.choice(["phones", "computers", "cameras", "audio"])
|
||||
elif category == "furniture":
|
||||
attrs["subcategory"] = random.choice(["chairs", "tables", "sofas", "beds"])
|
||||
elif category == "clothing":
|
||||
attrs["subcategory"] = random.choice(["shirts", "pants", "dresses", "shoes"])
|
||||
|
||||
# Add some intentionally missing fields for testing
|
||||
if random.random() > 0.9: # 10% chance of missing price
|
||||
del attrs["price"]
|
||||
|
||||
# Some items have promotion field
|
||||
if random.random() > 0.7: # 30% chance of having a promotion
|
||||
attrs["promotion"] = random.choice(["discount", "bundle", "gift"])
|
||||
|
||||
# Create invalid JSON for a small percentage of vectors
|
||||
if random.random() > 0.98: # 2% chance of having invalid JSON
|
||||
return "{{invalid json}}"
|
||||
|
||||
return json.dumps(attrs)
|
||||
|
||||
def create_vectors_with_attributes(self, key, count):
|
||||
"""Create vectors and add attributes to them"""
|
||||
vectors = []
|
||||
names = []
|
||||
attribute_map = {} # To store attributes for verification
|
||||
|
||||
# Create vectors
|
||||
for i in range(count):
|
||||
vec = generate_random_vector(self.dim)
|
||||
vectors.append(vec)
|
||||
name = f"{key}:item:{i}"
|
||||
names.append(name)
|
||||
|
||||
# Add to Redis
|
||||
vec_bytes = struct.pack(f'{self.dim}f', *vec)
|
||||
self.redis.execute_command('VADD', key, 'FP32', vec_bytes, name)
|
||||
|
||||
# Create and add attributes
|
||||
attrs = self.create_attributes(i)
|
||||
self.redis.execute_command('VSETATTR', key, name, attrs)
|
||||
|
||||
# Store attributes for later verification
|
||||
try:
|
||||
attribute_map[name] = json.loads(attrs) if '{' in attrs else None
|
||||
except json.JSONDecodeError:
|
||||
attribute_map[name] = None
|
||||
|
||||
return vectors, names, attribute_map
|
||||
|
||||
def filter_linear_search(self, vectors, names, query_vector, filter_expr, attribute_map, k=10):
|
||||
"""Perform a linear search with filtering for verification"""
|
||||
similarities = []
|
||||
query_norm = math.sqrt(sum(x*x for x in query_vector))
|
||||
|
||||
if query_norm == 0:
|
||||
return []
|
||||
|
||||
for i, vec in enumerate(vectors):
|
||||
name = names[i]
|
||||
attributes = attribute_map.get(name)
|
||||
|
||||
# Skip if doesn't match filter
|
||||
if not self.matches_filter(attributes, filter_expr):
|
||||
continue
|
||||
|
||||
vec_norm = math.sqrt(sum(x*x for x in vec))
|
||||
if vec_norm == 0:
|
||||
continue
|
||||
|
||||
dot_product = sum(a*b for a,b in zip(query_vector, vec))
|
||||
cosine_sim = dot_product / (query_norm * vec_norm)
|
||||
distance = 1.0 - cosine_sim
|
||||
redis_similarity = 1.0 - (distance/2.0)
|
||||
similarities.append((name, redis_similarity))
|
||||
|
||||
similarities.sort(key=lambda x: x[1], reverse=True)
|
||||
return similarities[:k]
|
||||
|
||||
def matches_filter(self, attributes, filter_expr):
|
||||
"""Filter matching for verification - uses Python eval to handle complex expressions"""
|
||||
if attributes is None:
|
||||
return False # No attributes or invalid JSON
|
||||
|
||||
# Replace JSON path selectors with Python dictionary access
|
||||
py_expr = filter_expr
|
||||
|
||||
# Handle `.field` notation (replace with attributes['field'])
|
||||
i = 0
|
||||
while i < len(py_expr):
|
||||
if py_expr[i] == '.' and (i == 0 or not py_expr[i-1].isalnum()):
|
||||
# Find the end of the selector (stops at operators or whitespace)
|
||||
j = i + 1
|
||||
while j < len(py_expr) and (py_expr[j].isalnum() or py_expr[j] == '_'):
|
||||
j += 1
|
||||
|
||||
if j > i + 1: # Found a valid selector
|
||||
field = py_expr[i+1:j]
|
||||
# Use a safe access pattern that returns a default value based on context
|
||||
py_expr = py_expr[:i] + f"attributes.get('{field}')" + py_expr[j:]
|
||||
i = i + len(f"attributes.get('{field}')")
|
||||
else:
|
||||
i += 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
# Convert not operator if needed
|
||||
py_expr = py_expr.replace('!', ' not ')
|
||||
|
||||
try:
|
||||
# Custom evaluation that handles exceptions for missing fields
|
||||
# by returning False for the entire expression
|
||||
|
||||
# Split the expression on logical operators
|
||||
parts = []
|
||||
for op in [' and ', ' or ']:
|
||||
if op in py_expr:
|
||||
parts = py_expr.split(op)
|
||||
break
|
||||
|
||||
if not parts: # No logical operators found
|
||||
parts = [py_expr]
|
||||
|
||||
# Try to evaluate each part - if any part fails,
|
||||
# the whole expression should fail
|
||||
try:
|
||||
result = eval(py_expr, {"attributes": attributes})
|
||||
return bool(result)
|
||||
except (TypeError, AttributeError):
|
||||
# This typically happens when trying to compare None with
|
||||
# numbers or other types, or when an attribute doesn't exist
|
||||
return False
|
||||
except Exception as e:
|
||||
print(f"Error evaluating filter expression '{filter_expr}' as '{py_expr}': {e}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error evaluating filter expression '{filter_expr}' as '{py_expr}': {e}")
|
||||
return False
|
||||
|
||||
def safe_decode(self,item):
|
||||
return item.decode() if isinstance(item, bytes) else item
|
||||
|
||||
def calculate_recall(self, redis_results, linear_results, k=10):
|
||||
"""Calculate recall (percentage of correct results retrieved)"""
|
||||
redis_set = set(self.safe_decode(item) for item in redis_results)
|
||||
linear_set = set(item[0] for item in linear_results[:k])
|
||||
|
||||
if not linear_set:
|
||||
return 1.0 # If no linear results, consider it perfect recall
|
||||
|
||||
intersection = redis_set.intersection(linear_set)
|
||||
return len(intersection) / len(linear_set)
|
||||
|
||||
def test_recall_with_filter(self, filter_expr, ef=500, filter_ef=None):
|
||||
"""Test recall for a given filter expression"""
|
||||
# Create query vector
|
||||
query_vec = generate_random_vector(self.dim)
|
||||
|
||||
# First, get ground truth using linear scan
|
||||
linear_results = self.filter_linear_search(
|
||||
self.vectors, self.names, query_vec, filter_expr, self.attribute_map, k=50)
|
||||
|
||||
# Calculate true selectivity from ground truth
|
||||
true_selectivity = len(linear_results) / len(self.names) if self.names else 0
|
||||
|
||||
# Perform Redis search with filter
|
||||
cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
|
||||
cmd_args.extend([str(x) for x in query_vec])
|
||||
cmd_args.extend(['COUNT', 50, 'WITHSCORES', 'EF', ef, 'FILTER', filter_expr])
|
||||
if filter_ef:
|
||||
cmd_args.extend(['FILTER-EF', filter_ef])
|
||||
|
||||
start_time = time.time()
|
||||
redis_results = self.redis.execute_command(*cmd_args)
|
||||
query_time = time.time() - start_time
|
||||
|
||||
# Convert Redis results to dict
|
||||
redis_items = {}
|
||||
for i in range(0, len(redis_results), 2):
|
||||
key = redis_results[i].decode() if isinstance(redis_results[i], bytes) else redis_results[i]
|
||||
score = float(redis_results[i+1])
|
||||
redis_items[key] = score
|
||||
|
||||
# Calculate metrics
|
||||
recall = self.calculate_recall(redis_items.keys(), linear_results)
|
||||
selectivity = len(redis_items) / len(self.names) if redis_items else 0
|
||||
|
||||
# Compare against the true selectivity from linear scan
|
||||
assert abs(selectivity - true_selectivity) < 0.1, \
|
||||
f"Redis selectivity {selectivity:.3f} differs significantly from ground truth {true_selectivity:.3f}"
|
||||
|
||||
# We expect high recall for standard parameters
|
||||
if ef >= 500 and (filter_ef is None or filter_ef >= 1000):
|
||||
try:
|
||||
assert recall >= 0.7, \
|
||||
f"Low recall {recall:.2f} for filter '{filter_expr}'"
|
||||
except AssertionError as e:
|
||||
# Get items found in each set
|
||||
redis_items_set = set(redis_items.keys())
|
||||
linear_items_set = set(item[0] for item in linear_results)
|
||||
|
||||
# Find items in each set
|
||||
only_in_redis = redis_items_set - linear_items_set
|
||||
only_in_linear = linear_items_set - redis_items_set
|
||||
in_both = redis_items_set & linear_items_set
|
||||
|
||||
# Build comprehensive debug message
|
||||
debug = f"\nGround Truth: {len(linear_results)} matching items (total vectors: {len(self.vectors)})"
|
||||
debug += f"\nRedis Found: {len(redis_items)} items with FILTER-EF: {filter_ef or 'default'}"
|
||||
debug += f"\nItems in both sets: {len(in_both)} (recall: {recall:.4f})"
|
||||
debug += f"\nItems only in Redis: {len(only_in_redis)}"
|
||||
debug += f"\nItems only in Ground Truth: {len(only_in_linear)}"
|
||||
|
||||
# Show some example items from each set with their scores
|
||||
if only_in_redis:
|
||||
debug += "\n\nTOP 5 ITEMS ONLY IN REDIS:"
|
||||
sorted_redis = sorted([(k, v) for k, v in redis_items.items()], key=lambda x: x[1], reverse=True)
|
||||
for i, (item, score) in enumerate(sorted_redis[:5]):
|
||||
if item in only_in_redis:
|
||||
debug += f"\n {i+1}. {item} (Score: {score:.4f})"
|
||||
|
||||
# Show attribute that should match filter
|
||||
attr = self.attribute_map.get(item)
|
||||
if attr:
|
||||
debug += f" - Attrs: {attr.get('category', 'N/A')}, Price: {attr.get('price', 'N/A')}"
|
||||
|
||||
if only_in_linear:
|
||||
debug += "\n\nTOP 5 ITEMS ONLY IN GROUND TRUTH:"
|
||||
for i, (item, score) in enumerate(linear_results[:5]):
|
||||
if item in only_in_linear:
|
||||
debug += f"\n {i+1}. {item} (Score: {score:.4f})"
|
||||
|
||||
# Show attribute that should match filter
|
||||
attr = self.attribute_map.get(item)
|
||||
if attr:
|
||||
debug += f" - Attrs: {attr.get('category', 'N/A')}, Price: {attr.get('price', 'N/A')}"
|
||||
|
||||
# Help identify parsing issues
|
||||
debug += "\n\nPARSING CHECK:"
|
||||
debug += f"\nRedis command: VSIM {self.test_key} VALUES {self.dim} [...] FILTER '{filter_expr}'"
|
||||
|
||||
# Check for WITHSCORES handling issues
|
||||
if len(redis_results) > 0 and len(redis_results) % 2 == 0:
|
||||
debug += f"\nRedis returned {len(redis_results)} items (looks like item,score pairs)"
|
||||
debug += f"\nFirst few results: {redis_results[:4]}"
|
||||
|
||||
# Check the filter implementation
|
||||
debug += "\n\nFILTER IMPLEMENTATION CHECK:"
|
||||
debug += f"\nFilter expression: '{filter_expr}'"
|
||||
debug += "\nSample attribute matches from attribute_map:"
|
||||
count_matching = 0
|
||||
for i, (name, attrs) in enumerate(self.attribute_map.items()):
|
||||
if attrs and self.matches_filter(attrs, filter_expr):
|
||||
count_matching += 1
|
||||
if i < 3: # Show first 3 matches
|
||||
debug += f"\n - {name}: {attrs}"
|
||||
debug += f"\nTotal items matching filter in attribute_map: {count_matching}"
|
||||
|
||||
# Check if results array handling could be wrong
|
||||
debug += "\n\nRESULT ARRAYS CHECK:"
|
||||
if len(linear_results) >= 1:
|
||||
debug += f"\nlinear_results[0]: {linear_results[0]}"
|
||||
if isinstance(linear_results[0], tuple) and len(linear_results[0]) == 2:
|
||||
debug += " (correct tuple format: (name, score))"
|
||||
else:
|
||||
debug += " (UNEXPECTED FORMAT!)"
|
||||
|
||||
# Debug sort order
|
||||
debug += "\n\nSORTING CHECK:"
|
||||
if len(linear_results) >= 2:
|
||||
debug += f"\nGround truth first item score: {linear_results[0][1]}"
|
||||
debug += f"\nGround truth second item score: {linear_results[1][1]}"
|
||||
debug += f"\nCorrectly sorted by similarity? {linear_results[0][1] >= linear_results[1][1]}"
|
||||
|
||||
# Re-raise with detailed information
|
||||
raise AssertionError(str(e) + debug)
|
||||
|
||||
return recall, selectivity, query_time, len(redis_items)
|
||||
|
||||
def test(self):
|
||||
print(f"\nRunning comprehensive VSIM FILTER tests...")
|
||||
|
||||
# Create a larger dataset for testing
|
||||
print(f"Creating dataset with {self.count} vectors and attributes...")
|
||||
self.vectors, self.names, self.attribute_map = self.create_vectors_with_attributes(
|
||||
self.test_key, self.count)
|
||||
|
||||
# ==== 1. Recall and Precision Testing ====
|
||||
print("Testing recall for various filters...")
|
||||
|
||||
# Test basic filters with different selectivity
|
||||
results = {}
|
||||
results["category"] = self.test_recall_with_filter('.category == "electronics"')
|
||||
results["price_high"] = self.test_recall_with_filter('.price > 1000')
|
||||
results["in_stock"] = self.test_recall_with_filter('.in_stock')
|
||||
results["rating"] = self.test_recall_with_filter('.rating >= 4')
|
||||
results["complex1"] = self.test_recall_with_filter('.category == "electronics" and .price < 500')
|
||||
|
||||
print("Filter | Recall | Selectivity | Time (ms) | Results")
|
||||
print("----------------------------------------------------")
|
||||
for name, (recall, selectivity, time_ms, count) in results.items():
|
||||
print(f"{name:7} | {recall:.3f} | {selectivity:.3f} | {time_ms*1000:.1f} | {count}")
|
||||
|
||||
# ==== 2. Filter Selectivity Performance ====
|
||||
print("\nTesting filter selectivity performance...")
|
||||
|
||||
# High selectivity (very few matches)
|
||||
high_sel_recall, _, high_sel_time, _ = self.test_recall_with_filter('.is_premium')
|
||||
|
||||
# Medium selectivity
|
||||
med_sel_recall, _, med_sel_time, _ = self.test_recall_with_filter('.price > 100 and .price < 1000')
|
||||
|
||||
# Low selectivity (many matches)
|
||||
low_sel_recall, _, low_sel_time, _ = self.test_recall_with_filter('.year > 2000')
|
||||
|
||||
print(f"High selectivity recall: {high_sel_recall:.3f}, time: {high_sel_time*1000:.1f}ms")
|
||||
print(f"Med selectivity recall: {med_sel_recall:.3f}, time: {med_sel_time*1000:.1f}ms")
|
||||
print(f"Low selectivity recall: {low_sel_recall:.3f}, time: {low_sel_time*1000:.1f}ms")
|
||||
|
||||
# ==== 3. FILTER-EF Parameter Testing ====
|
||||
print("\nTesting FILTER-EF parameter...")
|
||||
|
||||
# Test with different FILTER-EF values
|
||||
filter_expr = '.category == "electronics" and .price > 200'
|
||||
ef_values = [100, 500, 2000, 5000]
|
||||
|
||||
print("FILTER-EF | Recall | Time (ms)")
|
||||
print("-----------------------------")
|
||||
for filter_ef in ef_values:
|
||||
recall, _, query_time, _ = self.test_recall_with_filter(
|
||||
filter_expr, ef=500, filter_ef=filter_ef)
|
||||
print(f"{filter_ef:9} | {recall:.3f} | {query_time*1000:.1f}")
|
||||
|
||||
# Assert that higher FILTER-EF generally gives better recall
|
||||
low_ef_recall, _, _, _ = self.test_recall_with_filter(filter_expr, filter_ef=100)
|
||||
high_ef_recall, _, _, _ = self.test_recall_with_filter(filter_expr, filter_ef=5000)
|
||||
|
||||
# This might not always be true due to randomness, but generally holds
|
||||
# We use a softer assertion to avoid flaky tests
|
||||
assert high_ef_recall >= low_ef_recall * 0.8, \
|
||||
f"Higher FILTER-EF should generally give better recall: {high_ef_recall:.3f} vs {low_ef_recall:.3f}"
|
||||
|
||||
# ==== 4. Complex Filter Expressions ====
|
||||
print("\nTesting complex filter expressions...")
|
||||
|
||||
# Test a variety of complex expressions
|
||||
complex_filters = [
|
||||
'.price > 100 and (.category == "electronics" or .category == "furniture")',
|
||||
'(.rating > 4 and .in_stock) or (.price < 50 and .views > 1000)',
|
||||
'.category in ["electronics", "clothing"] and .price > 200 and .rating >= 3',
|
||||
'(.category == "electronics" and .subcategory == "phones") or (.category == "furniture" and .price > 1000)',
|
||||
'.year > 2010 and !(.price < 100) and .in_stock'
|
||||
]
|
||||
|
||||
print("Expression | Results | Time (ms)")
|
||||
print("-----------------------------")
|
||||
for i, expr in enumerate(complex_filters):
|
||||
try:
|
||||
_, _, query_time, result_count = self.test_recall_with_filter(expr)
|
||||
print(f"Complex {i+1} | {result_count:7} | {query_time*1000:.1f}")
|
||||
except Exception as e:
|
||||
print(f"Complex {i+1} | Error: {str(e)}")
|
||||
|
||||
# ==== 5. Attribute Type Testing ====
|
||||
print("\nTesting different attribute types...")
|
||||
|
||||
type_filters = [
|
||||
('.price > 500', "Numeric"),
|
||||
('.category == "books"', "String equality"),
|
||||
('.in_stock', "Boolean"),
|
||||
('.tags in ["sale", "new"]', "Array membership"),
|
||||
('.rating * 2 > 8', "Arithmetic")
|
||||
]
|
||||
|
||||
for expr, type_name in type_filters:
|
||||
try:
|
||||
_, _, query_time, result_count = self.test_recall_with_filter(expr)
|
||||
print(f"{type_name:16} | {expr:30} | {result_count:5} results | {query_time*1000:.1f}ms")
|
||||
except Exception as e:
|
||||
print(f"{type_name:16} | {expr:30} | Error: {str(e)}")
|
||||
|
||||
# ==== 6. Filter + Count Interaction ====
|
||||
print("\nTesting COUNT parameter with filters...")
|
||||
|
||||
filter_expr = '.category == "electronics"'
|
||||
counts = [5, 20, 100]
|
||||
|
||||
for count in counts:
|
||||
query_vec = generate_random_vector(self.dim)
|
||||
cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
|
||||
cmd_args.extend([str(x) for x in query_vec])
|
||||
cmd_args.extend(['COUNT', count, 'WITHSCORES', 'FILTER', filter_expr])
|
||||
|
||||
results = self.redis.execute_command(*cmd_args)
|
||||
result_count = len(results) // 2 # Divide by 2 because WITHSCORES returns pairs
|
||||
|
||||
# We expect result count to be at most the requested count
|
||||
assert result_count <= count, f"Got {result_count} results with COUNT {count}"
|
||||
print(f"COUNT {count:3} | Got {result_count:3} results")
|
||||
|
||||
# ==== 7. Edge Cases ====
|
||||
print("\nTesting edge cases...")
|
||||
|
||||
# Test with no matching items
|
||||
no_match_expr = '.category == "nonexistent_category"'
|
||||
results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim,
|
||||
*[str(x) for x in generate_random_vector(self.dim)],
|
||||
'FILTER', no_match_expr)
|
||||
assert len(results) == 0, f"Expected 0 results for non-matching filter, got {len(results)}"
|
||||
print(f"No matching items: {len(results)} results (expected 0)")
|
||||
|
||||
# Test with invalid filter syntax
|
||||
try:
|
||||
self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim,
|
||||
*[str(x) for x in generate_random_vector(self.dim)],
|
||||
'FILTER', '.category === "books"') # Triple equals is invalid
|
||||
assert False, "Expected error for invalid filter syntax"
|
||||
except:
|
||||
print("Invalid filter syntax correctly raised an error")
|
||||
|
||||
# Test with extremely long complex expression
|
||||
long_expr = ' and '.join([f'.rating > {i/10}' for i in range(10)])
|
||||
try:
|
||||
results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', self.dim,
|
||||
*[str(x) for x in generate_random_vector(self.dim)],
|
||||
'FILTER', long_expr)
|
||||
print(f"Long expression: {len(results)} results")
|
||||
except Exception as e:
|
||||
print(f"Long expression error: {str(e)}")
|
||||
|
||||
print("\nComprehensive VSIM FILTER tests completed successfully")
|
||||
|
||||
|
||||
class VSIMFilterSelectivityTest(TestCase):
|
||||
def getname(self):
|
||||
return "VSIM FILTER selectivity performance benchmark"
|
||||
|
||||
def estimated_runtime(self):
|
||||
return 8 # This test might take up to 8 seconds
|
||||
|
||||
def setup(self):
|
||||
super().setup()
|
||||
self.dim = 32
|
||||
self.count = 10000
|
||||
self.test_key = f"{self.test_key}:selectivity" # Use a different key
|
||||
|
||||
def create_vector_with_age_attribute(self, name, age):
|
||||
"""Create a vector with a specific age attribute"""
|
||||
vec = generate_random_vector(self.dim)
|
||||
vec_bytes = struct.pack(f'{self.dim}f', *vec)
|
||||
self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, name)
|
||||
self.redis.execute_command('VSETATTR', self.test_key, name, json.dumps({"age": age}))
|
||||
|
||||
def test(self):
|
||||
print("\nRunning VSIM FILTER selectivity benchmark...")
|
||||
|
||||
# Create a dataset where we control the exact selectivity
|
||||
print(f"Creating controlled dataset with {self.count} vectors...")
|
||||
|
||||
# Create vectors with age attributes from 1 to 100
|
||||
for i in range(self.count):
|
||||
age = (i % 100) + 1 # Ages from 1 to 100
|
||||
name = f"{self.test_key}:item:{i}"
|
||||
self.create_vector_with_age_attribute(name, age)
|
||||
|
||||
# Create a query vector
|
||||
query_vec = generate_random_vector(self.dim)
|
||||
|
||||
# Test filters with different selectivities
|
||||
selectivities = [0.01, 0.05, 0.10, 0.25, 0.50, 0.75, 0.99]
|
||||
results = []
|
||||
|
||||
print("\nSelectivity | Filter | Results | Time (ms)")
|
||||
print("--------------------------------------------------")
|
||||
|
||||
for target_selectivity in selectivities:
|
||||
# Calculate age threshold for desired selectivity
|
||||
# For example, age <= 10 gives 10% selectivity
|
||||
age_threshold = int(target_selectivity * 100)
|
||||
filter_expr = f'.age <= {age_threshold}'
|
||||
|
||||
# Run query and measure time
|
||||
start_time = time.time()
|
||||
cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
|
||||
cmd_args.extend([str(x) for x in query_vec])
|
||||
cmd_args.extend(['COUNT', 100, 'FILTER', filter_expr])
|
||||
|
||||
results = self.redis.execute_command(*cmd_args)
|
||||
query_time = time.time() - start_time
|
||||
|
||||
actual_selectivity = len(results) / min(100, int(target_selectivity * self.count))
|
||||
print(f"{target_selectivity:.2f} | {filter_expr:15} | {len(results):7} | {query_time*1000:.1f}")
|
||||
|
||||
# Add assertion to ensure reasonable performance for different selectivities
|
||||
# For very selective queries (1%), we might need more exploration
|
||||
if target_selectivity <= 0.05:
|
||||
# For very selective queries, ensure we can find some results
|
||||
assert len(results) > 0, f"No results found for {filter_expr}"
|
||||
else:
|
||||
# For less selective queries, performance should be reasonable
|
||||
assert query_time < 1.0, f"Query too slow: {query_time:.3f}s for {filter_expr}"
|
||||
|
||||
print("\nSelectivity benchmark completed successfully")
|
||||
|
||||
|
||||
class VSIMFilterComparisonTest(TestCase):
|
||||
def getname(self):
|
||||
return "VSIM FILTER EF parameter comparison"
|
||||
|
||||
def estimated_runtime(self):
|
||||
return 8 # This test might take up to 8 seconds
|
||||
|
||||
def setup(self):
|
||||
super().setup()
|
||||
self.dim = 32
|
||||
self.count = 5000
|
||||
self.test_key = f"{self.test_key}:efparams" # Use a different key
|
||||
|
||||
def create_dataset(self):
|
||||
"""Create a dataset with specific attribute patterns for testing FILTER-EF"""
|
||||
vectors = []
|
||||
names = []
|
||||
|
||||
# Create vectors with category and quality score attributes
|
||||
for i in range(self.count):
|
||||
vec = generate_random_vector(self.dim)
|
||||
name = f"{self.test_key}:item:{i}"
|
||||
|
||||
# Add vector to Redis
|
||||
vec_bytes = struct.pack(f'{self.dim}f', *vec)
|
||||
self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, name)
|
||||
|
||||
# Create attributes - we want a very selective filter
|
||||
# Only 2% of items have category=premium AND quality>90
|
||||
category = "premium" if random.random() < 0.1 else random.choice(["standard", "economy", "basic"])
|
||||
quality = random.randint(1, 100)
|
||||
|
||||
attrs = {
|
||||
"id": i,
|
||||
"category": category,
|
||||
"quality": quality
|
||||
}
|
||||
|
||||
self.redis.execute_command('VSETATTR', self.test_key, name, json.dumps(attrs))
|
||||
vectors.append(vec)
|
||||
names.append(name)
|
||||
|
||||
return vectors, names
|
||||
|
||||
def test(self):
|
||||
print("\nRunning VSIM FILTER-EF parameter comparison...")
|
||||
|
||||
# Create dataset
|
||||
vectors, names = self.create_dataset()
|
||||
|
||||
# Create a selective filter that matches ~2% of items
|
||||
filter_expr = '.category == "premium" and .quality > 90'
|
||||
|
||||
# Create query vector
|
||||
query_vec = generate_random_vector(self.dim)
|
||||
|
||||
# Test different FILTER-EF values
|
||||
ef_values = [50, 100, 500, 1000, 5000]
|
||||
results = []
|
||||
|
||||
print("\nFILTER-EF | Results | Time (ms) | Notes")
|
||||
print("---------------------------------------")
|
||||
|
||||
baseline_count = None
|
||||
|
||||
for ef in ef_values:
|
||||
# Run query and measure time
|
||||
start_time = time.time()
|
||||
cmd_args = ['VSIM', self.test_key, 'VALUES', self.dim]
|
||||
cmd_args.extend([str(x) for x in query_vec])
|
||||
cmd_args.extend(['COUNT', 100, 'FILTER', filter_expr, 'FILTER-EF', ef])
|
||||
|
||||
query_results = self.redis.execute_command(*cmd_args)
|
||||
query_time = time.time() - start_time
|
||||
|
||||
# Set baseline for comparison
|
||||
if baseline_count is None:
|
||||
baseline_count = len(query_results)
|
||||
|
||||
recall_rate = len(query_results) / max(1, baseline_count) if baseline_count > 0 else 1.0
|
||||
|
||||
notes = ""
|
||||
if ef == 5000:
|
||||
notes = "Baseline"
|
||||
elif recall_rate < 0.5:
|
||||
notes = "Low recall!"
|
||||
|
||||
print(f"{ef:9} | {len(query_results):7} | {query_time*1000:.1f} | {notes}")
|
||||
results.append((ef, len(query_results), query_time))
|
||||
|
||||
# If we have enough results at highest EF, check that recall improves with higher EF
|
||||
if results[-1][1] >= 5: # At least 5 results for highest EF
|
||||
# Extract result counts
|
||||
result_counts = [r[1] for r in results]
|
||||
|
||||
# The last result (highest EF) should typically find more results than the first (lowest EF)
|
||||
# but we use a soft assertion to avoid flaky tests
|
||||
assert result_counts[-1] >= result_counts[0], \
|
||||
f"Higher FILTER-EF should find at least as many results: {result_counts[-1]} vs {result_counts[0]}"
|
||||
|
||||
print("\nFILTER-EF parameter comparison completed successfully")
|
|
@ -0,0 +1,56 @@
|
|||
from test import TestCase, fill_redis_with_vectors, generate_random_vector
|
||||
import random
|
||||
|
||||
class LargeScale(TestCase):
|
||||
def getname(self):
|
||||
return "Large Scale Comparison"
|
||||
|
||||
def estimated_runtime(self):
|
||||
return 10
|
||||
|
||||
def test(self):
|
||||
dim = 300
|
||||
count = 20000
|
||||
k = 50
|
||||
|
||||
# Fill Redis and get reference data for comparison
|
||||
random.seed(42) # Make test deterministic
|
||||
data = fill_redis_with_vectors(self.redis, self.test_key, count, dim)
|
||||
|
||||
# Generate query vector
|
||||
query_vec = generate_random_vector(dim)
|
||||
|
||||
# Get results from Redis with good exploration factor
|
||||
redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
|
||||
*[str(x) for x in query_vec],
|
||||
'COUNT', k, 'WITHSCORES', 'EF', 500)
|
||||
|
||||
# Convert Redis results to dict
|
||||
redis_results = {}
|
||||
for i in range(0, len(redis_raw), 2):
|
||||
key = redis_raw[i].decode()
|
||||
score = float(redis_raw[i+1])
|
||||
redis_results[key] = score
|
||||
|
||||
# Get results from linear scan
|
||||
linear_results = data.find_k_nearest(query_vec, k)
|
||||
linear_items = {name: score for name, score in linear_results}
|
||||
|
||||
# Compare overlap
|
||||
redis_set = set(redis_results.keys())
|
||||
linear_set = set(linear_items.keys())
|
||||
overlap = len(redis_set & linear_set)
|
||||
|
||||
# If test fails, print comparison for debugging
|
||||
if overlap < k * 0.7:
|
||||
data.print_comparison({'items': redis_results, 'query_vector': query_vec}, k)
|
||||
|
||||
assert overlap >= k * 0.7, \
|
||||
f"Expected at least 70% overlap in top {k} results, got {overlap/k*100:.1f}%"
|
||||
|
||||
# Verify scores for common items
|
||||
for item in redis_set & linear_set:
|
||||
redis_score = redis_results[item]
|
||||
linear_score = linear_items[item]
|
||||
assert abs(redis_score - linear_score) < 0.01, \
|
||||
f"Score mismatch for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}"
|
|
@ -0,0 +1,36 @@
|
|||
from test import TestCase, generate_random_vector
|
||||
import struct
|
||||
|
||||
class MemoryUsageTest(TestCase):
|
||||
def getname(self):
|
||||
return "[regression] MEMORY USAGE with attributes"
|
||||
|
||||
def test(self):
|
||||
# Generate random vectors
|
||||
vec1 = generate_random_vector(4)
|
||||
vec2 = generate_random_vector(4)
|
||||
vec_bytes1 = struct.pack('4f', *vec1)
|
||||
vec_bytes2 = struct.pack('4f', *vec2)
|
||||
|
||||
# Add vectors to the key, one with attribute, one without
|
||||
self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes1, f'{self.test_key}:item:1')
|
||||
self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes2, f'{self.test_key}:item:2', 'SETATTR', '{"color":"red"}')
|
||||
|
||||
# Get memory usage for the key
|
||||
try:
|
||||
memory_usage = self.redis.execute_command('MEMORY', 'USAGE', self.test_key)
|
||||
# If we got here without exception, the command worked
|
||||
assert memory_usage > 0, "MEMORY USAGE should return a positive value"
|
||||
|
||||
# Add more attributes to increase complexity
|
||||
self.redis.execute_command('VSETATTR', self.test_key, f'{self.test_key}:item:1', '{"color":"blue","size":10}')
|
||||
|
||||
# Check memory usage again
|
||||
new_memory_usage = self.redis.execute_command('MEMORY', 'USAGE', self.test_key)
|
||||
assert new_memory_usage > 0, "MEMORY USAGE should still return a positive value after setting attributes"
|
||||
|
||||
# Memory usage should be higher after adding attributes
|
||||
assert new_memory_usage > memory_usage, "Memory usage increase after adding attributes"
|
||||
|
||||
except Exception as e:
|
||||
raise AssertionError(f"MEMORY USAGE command failed: {str(e)}")
|
|
@ -0,0 +1,85 @@
|
|||
from test import TestCase, generate_random_vector
|
||||
import struct
|
||||
import math
|
||||
import random
|
||||
|
||||
class VectorUpdateAndClusters(TestCase):
|
||||
def getname(self):
|
||||
return "VADD vector update with cluster relocation"
|
||||
|
||||
def estimated_runtime(self):
|
||||
return 2.0 # Should take around 2 seconds
|
||||
|
||||
def generate_cluster_vector(self, base_vec, noise=0.1):
|
||||
"""Generate a vector that's similar to base_vec with some noise."""
|
||||
vec = [x + random.gauss(0, noise) for x in base_vec]
|
||||
# Normalize
|
||||
norm = math.sqrt(sum(x*x for x in vec))
|
||||
return [x/norm for x in vec]
|
||||
|
||||
def test(self):
|
||||
dim = 128
|
||||
vectors_per_cluster = 5000
|
||||
|
||||
# Create two very different base vectors for our clusters
|
||||
cluster1_base = generate_random_vector(dim)
|
||||
cluster2_base = [-x for x in cluster1_base] # Opposite direction
|
||||
|
||||
# Add vectors from first cluster
|
||||
for i in range(vectors_per_cluster):
|
||||
vec = self.generate_cluster_vector(cluster1_base)
|
||||
vec_bytes = struct.pack(f'{dim}f', *vec)
|
||||
self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes,
|
||||
f'{self.test_key}:cluster1:{i}')
|
||||
|
||||
# Add vectors from second cluster
|
||||
for i in range(vectors_per_cluster):
|
||||
vec = self.generate_cluster_vector(cluster2_base)
|
||||
vec_bytes = struct.pack(f'{dim}f', *vec)
|
||||
self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes,
|
||||
f'{self.test_key}:cluster2:{i}')
|
||||
|
||||
# Pick a test vector from cluster1
|
||||
test_key = f'{self.test_key}:cluster1:0'
|
||||
|
||||
# Verify it's in cluster1 using VSIM
|
||||
initial_vec = self.generate_cluster_vector(cluster1_base)
|
||||
results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
|
||||
*[str(x) for x in initial_vec],
|
||||
'COUNT', 100, 'WITHSCORES')
|
||||
|
||||
# Count how many cluster1 items are in top results
|
||||
cluster1_count = sum(1 for i in range(0, len(results), 2)
|
||||
if b'cluster1' in results[i])
|
||||
assert cluster1_count > 80, "Initial clustering check failed"
|
||||
|
||||
# Now update the test vector to be in cluster2
|
||||
new_vec = self.generate_cluster_vector(cluster2_base, noise=0.05)
|
||||
vec_bytes = struct.pack(f'{dim}f', *new_vec)
|
||||
self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, test_key)
|
||||
|
||||
# Verify the embedding was actually updated using VEMB
|
||||
emb_result = self.redis.execute_command('VEMB', self.test_key, test_key)
|
||||
updated_vec = [float(x) for x in emb_result]
|
||||
|
||||
# Verify updated vector matches what we inserted
|
||||
dot_product = sum(a*b for a,b in zip(updated_vec, new_vec))
|
||||
similarity = dot_product / (math.sqrt(sum(x*x for x in updated_vec)) *
|
||||
math.sqrt(sum(x*x for x in new_vec)))
|
||||
assert similarity > 0.9, "Vector was not properly updated"
|
||||
|
||||
# Verify it's now in cluster2 using VSIM
|
||||
results = self.redis.execute_command('VSIM', self.test_key, 'VALUES', dim,
|
||||
*[str(x) for x in cluster2_base],
|
||||
'COUNT', 100, 'WITHSCORES')
|
||||
|
||||
# Verify our updated vector is among top results
|
||||
found = False
|
||||
for i in range(0, len(results), 2):
|
||||
if results[i].decode() == test_key:
|
||||
found = True
|
||||
similarity = float(results[i+1])
|
||||
assert similarity > 0.80, f"Updated vector has low similarity: {similarity}"
|
||||
break
|
||||
|
||||
assert found, "Updated vector not found in cluster2 proximity"
|
|
@ -0,0 +1,83 @@
|
|||
from test import TestCase, fill_redis_with_vectors, generate_random_vector
|
||||
import random
|
||||
|
||||
class HNSWPersistence(TestCase):
|
||||
def getname(self):
|
||||
return "HNSW Persistence"
|
||||
|
||||
def estimated_runtime(self):
|
||||
return 30
|
||||
|
||||
def _verify_results(self, key, dim, query_vec, reduced_dim=None):
|
||||
"""Run a query and return results dict"""
|
||||
k = 10
|
||||
args = ['VSIM', key]
|
||||
|
||||
if reduced_dim:
|
||||
args.extend(['VALUES', dim])
|
||||
args.extend([str(x) for x in query_vec])
|
||||
else:
|
||||
args.extend(['VALUES', dim])
|
||||
args.extend([str(x) for x in query_vec])
|
||||
|
||||
args.extend(['COUNT', k, 'WITHSCORES'])
|
||||
results = self.redis.execute_command(*args)
|
||||
|
||||
results_dict = {}
|
||||
for i in range(0, len(results), 2):
|
||||
key = results[i].decode()
|
||||
score = float(results[i+1])
|
||||
results_dict[key] = score
|
||||
return results_dict
|
||||
|
||||
def test(self):
|
||||
# Setup dimensions
|
||||
dim = 128
|
||||
reduced_dim = 32
|
||||
count = 5000
|
||||
random.seed(42)
|
||||
|
||||
# Create two datasets - one normal and one with dimension reduction
|
||||
normal_data = fill_redis_with_vectors(self.redis, f"{self.test_key}:normal", count, dim)
|
||||
projected_data = fill_redis_with_vectors(self.redis, f"{self.test_key}:projected",
|
||||
count, dim, reduced_dim)
|
||||
|
||||
# Generate query vectors we'll use before and after reload
|
||||
query_vec_normal = generate_random_vector(dim)
|
||||
query_vec_projected = generate_random_vector(dim)
|
||||
|
||||
# Get initial results for both sets
|
||||
initial_normal = self._verify_results(f"{self.test_key}:normal",
|
||||
dim, query_vec_normal)
|
||||
initial_projected = self._verify_results(f"{self.test_key}:projected",
|
||||
dim, query_vec_projected, reduced_dim)
|
||||
|
||||
# Force Redis to save and reload the dataset
|
||||
self.redis.execute_command('DEBUG', 'RELOAD')
|
||||
|
||||
# Verify results after reload
|
||||
reloaded_normal = self._verify_results(f"{self.test_key}:normal",
|
||||
dim, query_vec_normal)
|
||||
reloaded_projected = self._verify_results(f"{self.test_key}:projected",
|
||||
dim, query_vec_projected, reduced_dim)
|
||||
|
||||
# Verify normal vectors results
|
||||
assert len(initial_normal) == len(reloaded_normal), \
|
||||
"Normal vectors: Result count mismatch before/after reload"
|
||||
|
||||
for key in initial_normal:
|
||||
assert key in reloaded_normal, f"Normal vectors: Missing item after reload: {key}"
|
||||
assert abs(initial_normal[key] - reloaded_normal[key]) < 0.0001, \
|
||||
f"Normal vectors: Score mismatch for {key}: " + \
|
||||
f"before={initial_normal[key]:.6f}, after={reloaded_normal[key]:.6f}"
|
||||
|
||||
# Verify projected vectors results
|
||||
assert len(initial_projected) == len(reloaded_projected), \
|
||||
"Projected vectors: Result count mismatch before/after reload"
|
||||
|
||||
for key in initial_projected:
|
||||
assert key in reloaded_projected, \
|
||||
f"Projected vectors: Missing item after reload: {key}"
|
||||
assert abs(initial_projected[key] - reloaded_projected[key]) < 0.0001, \
|
||||
f"Projected vectors: Score mismatch for {key}: " + \
|
||||
f"before={initial_projected[key]:.6f}, after={reloaded_projected[key]:.6f}"
|
|
@ -0,0 +1,71 @@
|
|||
from test import TestCase, fill_redis_with_vectors, generate_random_vector
|
||||
|
||||
class Reduce(TestCase):
|
||||
def getname(self):
|
||||
return "Dimension Reduction"
|
||||
|
||||
def estimated_runtime(self):
|
||||
return 0.2
|
||||
|
||||
def test(self):
|
||||
original_dim = 100
|
||||
reduced_dim = 80
|
||||
count = 1000
|
||||
k = 50 # Number of nearest neighbors to check
|
||||
|
||||
# Fill Redis with vectors using REDUCE and get reference data
|
||||
data = fill_redis_with_vectors(self.redis, self.test_key, count, original_dim, reduced_dim)
|
||||
|
||||
# Verify dimension is reduced
|
||||
dim = self.redis.execute_command('VDIM', self.test_key)
|
||||
assert dim == reduced_dim, f"Expected dimension {reduced_dim}, got {dim}"
|
||||
|
||||
# Generate query vector and get nearest neighbors using Redis
|
||||
query_vec = generate_random_vector(original_dim)
|
||||
redis_raw = self.redis.execute_command('VSIM', self.test_key, 'VALUES',
|
||||
original_dim, *[str(x) for x in query_vec],
|
||||
'COUNT', k, 'WITHSCORES')
|
||||
|
||||
# Convert Redis results to dict
|
||||
redis_results = {}
|
||||
for i in range(0, len(redis_raw), 2):
|
||||
key = redis_raw[i].decode()
|
||||
score = float(redis_raw[i+1])
|
||||
redis_results[key] = score
|
||||
|
||||
# Get results from linear scan with original vectors
|
||||
linear_results = data.find_k_nearest(query_vec, k)
|
||||
linear_items = {name: score for name, score in linear_results}
|
||||
|
||||
# Compare overlap between reduced and non-reduced results
|
||||
redis_set = set(redis_results.keys())
|
||||
linear_set = set(linear_items.keys())
|
||||
overlap = len(redis_set & linear_set)
|
||||
overlap_ratio = overlap / k
|
||||
|
||||
# With random projection, we expect some loss of accuracy but should
|
||||
# maintain at least some similarity structure.
|
||||
# Note that gaussian distribution is the worse with this test, so
|
||||
# in real world practice, things will be better.
|
||||
min_expected_overlap = 0.1 # At least 10% overlap in top-k
|
||||
assert overlap_ratio >= min_expected_overlap, \
|
||||
f"Dimension reduction lost too much structure. Only {overlap_ratio*100:.1f}% overlap in top {k}"
|
||||
|
||||
# For items that appear in both results, scores should be reasonably correlated
|
||||
common_items = redis_set & linear_set
|
||||
for item in common_items:
|
||||
redis_score = redis_results[item]
|
||||
linear_score = linear_items[item]
|
||||
# Allow for some deviation due to dimensionality reduction
|
||||
assert abs(redis_score - linear_score) < 0.2, \
|
||||
f"Score mismatch too high for {item}: Redis={redis_score:.3f} Linear={linear_score:.3f}"
|
||||
|
||||
# If test fails, print comparison for debugging
|
||||
if overlap_ratio < min_expected_overlap:
|
||||
print("\nLow overlap in results. Details:")
|
||||
print("\nTop results from linear scan (original vectors):")
|
||||
for name, score in linear_results:
|
||||
print(f"{name}: {score:.3f}")
|
||||
print("\nTop results from Redis (reduced vectors):")
|
||||
for item, score in sorted(redis_results.items(), key=lambda x: x[1], reverse=True):
|
||||
print(f"{item}: {score:.3f}")
|
|
@ -0,0 +1,92 @@
|
|||
from test import TestCase, generate_random_vector
|
||||
import struct
|
||||
import random
|
||||
import time
|
||||
|
||||
class ComprehensiveReplicationTest(TestCase):
|
||||
def getname(self):
|
||||
return "Comprehensive Replication Test with mixed operations"
|
||||
|
||||
def estimated_runtime(self):
|
||||
# This test will take longer than the default 100ms
|
||||
return 20.0 # 20 seconds estimate
|
||||
|
||||
def test(self):
|
||||
# Setup replication between primary and replica
|
||||
assert self.setup_replication(), "Failed to setup replication"
|
||||
|
||||
# Test parameters
|
||||
num_vectors = 5000
|
||||
vector_dim = 8
|
||||
delete_probability = 0.1
|
||||
cas_probability = 0.3
|
||||
|
||||
# Keep track of added items for potential deletion
|
||||
added_items = []
|
||||
|
||||
# Add vectors and occasionally delete
|
||||
for i in range(num_vectors):
|
||||
# Generate a random vector
|
||||
vec = generate_random_vector(vector_dim)
|
||||
vec_bytes = struct.pack(f'{vector_dim}f', *vec)
|
||||
item_name = f"{self.test_key}:item:{i}"
|
||||
|
||||
# Decide whether to use CAS or not
|
||||
use_cas = random.random() < cas_probability
|
||||
|
||||
if use_cas and added_items:
|
||||
# Get an existing item for CAS reference (if available)
|
||||
cas_item = random.choice(added_items)
|
||||
try:
|
||||
# Add with CAS
|
||||
result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes,
|
||||
item_name, 'CAS')
|
||||
# Only add to our list if actually added (CAS might fail)
|
||||
if result == 1:
|
||||
added_items.append(item_name)
|
||||
except Exception as e:
|
||||
print(f" CAS VADD failed: {e}")
|
||||
else:
|
||||
try:
|
||||
# Add without CAS
|
||||
result = self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, item_name)
|
||||
# Only add to our list if actually added
|
||||
if result == 1:
|
||||
added_items.append(item_name)
|
||||
except Exception as e:
|
||||
print(f" VADD failed: {e}")
|
||||
|
||||
# Randomly delete items (with 10% probability)
|
||||
if random.random() < delete_probability and added_items:
|
||||
try:
|
||||
# Select a random item to delete
|
||||
item_to_delete = random.choice(added_items)
|
||||
# Delete the item using VREM (not VDEL)
|
||||
self.redis.execute_command('VREM', self.test_key, item_to_delete)
|
||||
# Remove from our list
|
||||
added_items.remove(item_to_delete)
|
||||
except Exception as e:
|
||||
print(f" VREM failed: {e}")
|
||||
|
||||
# Allow time for replication to complete
|
||||
time.sleep(2.0)
|
||||
|
||||
# Verify final VCARD matches
|
||||
primary_card = self.redis.execute_command('VCARD', self.test_key)
|
||||
replica_card = self.replica.execute_command('VCARD', self.test_key)
|
||||
assert primary_card == replica_card, f"Final VCARD mismatch: primary={primary_card}, replica={replica_card}"
|
||||
|
||||
# Verify VDIM matches
|
||||
primary_dim = self.redis.execute_command('VDIM', self.test_key)
|
||||
replica_dim = self.replica.execute_command('VDIM', self.test_key)
|
||||
assert primary_dim == replica_dim, f"VDIM mismatch: primary={primary_dim}, replica={replica_dim}"
|
||||
|
||||
# Verify digests match using DEBUG DIGEST
|
||||
primary_digest = self.redis.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
|
||||
replica_digest = self.replica.execute_command('DEBUG', 'DIGEST-VALUE', self.test_key)
|
||||
assert primary_digest == replica_digest, f"Digest mismatch: primary={primary_digest}, replica={replica_digest}"
|
||||
|
||||
# Print summary
|
||||
print(f"\n Added and maintained {len(added_items)} vectors with dimension {vector_dim}")
|
||||
print(f" Final vector count: {primary_card}")
|
||||
print(f" Final digest: {primary_digest[0].decode()}")
|
|
@ -0,0 +1,98 @@
|
|||
from test import TestCase, generate_random_vector
|
||||
import threading
|
||||
import struct
|
||||
import math
|
||||
import time
|
||||
import random
|
||||
from typing import List, Dict
|
||||
|
||||
class ConcurrentCASTest(TestCase):
|
||||
def getname(self):
|
||||
return "Concurrent VADD with CAS"
|
||||
|
||||
def estimated_runtime(self):
|
||||
return 1.5
|
||||
|
||||
def worker(self, vectors: List[List[float]], start_idx: int, end_idx: int,
|
||||
dim: int, results: Dict[str, bool]):
|
||||
"""Worker thread that adds a subset of vectors using VADD CAS"""
|
||||
for i in range(start_idx, end_idx):
|
||||
vec = vectors[i]
|
||||
name = f"{self.test_key}:item:{i}"
|
||||
vec_bytes = struct.pack(f'{dim}f', *vec)
|
||||
|
||||
# Try to add the vector with CAS
|
||||
try:
|
||||
result = self.redis.execute_command('VADD', self.test_key, 'FP32',
|
||||
vec_bytes, name, 'CAS')
|
||||
results[name] = (result == 1) # Store if it was actually added
|
||||
except Exception as e:
|
||||
results[name] = False
|
||||
print(f"Error adding {name}: {e}")
|
||||
|
||||
def verify_vector_similarity(self, vec1: List[float], vec2: List[float]) -> float:
|
||||
"""Calculate cosine similarity between two vectors"""
|
||||
dot_product = sum(a*b for a,b in zip(vec1, vec2))
|
||||
norm1 = math.sqrt(sum(x*x for x in vec1))
|
||||
norm2 = math.sqrt(sum(x*x for x in vec2))
|
||||
return dot_product / (norm1 * norm2) if norm1 > 0 and norm2 > 0 else 0
|
||||
|
||||
def test(self):
|
||||
# Test parameters
|
||||
dim = 128
|
||||
total_vectors = 5000
|
||||
num_threads = 8
|
||||
vectors_per_thread = total_vectors // num_threads
|
||||
|
||||
# Generate all vectors upfront
|
||||
random.seed(42) # For reproducibility
|
||||
vectors = [generate_random_vector(dim) for _ in range(total_vectors)]
|
||||
|
||||
# Prepare threads and results dictionary
|
||||
threads = []
|
||||
results = {} # Will store success/failure for each vector
|
||||
|
||||
# Launch threads
|
||||
for i in range(num_threads):
|
||||
start_idx = i * vectors_per_thread
|
||||
end_idx = start_idx + vectors_per_thread if i < num_threads-1 else total_vectors
|
||||
thread = threading.Thread(target=self.worker,
|
||||
args=(vectors, start_idx, end_idx, dim, results))
|
||||
threads.append(thread)
|
||||
thread.start()
|
||||
|
||||
# Wait for all threads to complete
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
|
||||
# Verify cardinality
|
||||
card = self.redis.execute_command('VCARD', self.test_key)
|
||||
assert card == total_vectors, \
|
||||
f"Expected {total_vectors} elements, but found {card}"
|
||||
|
||||
# Verify each vector
|
||||
num_verified = 0
|
||||
for i in range(total_vectors):
|
||||
name = f"{self.test_key}:item:{i}"
|
||||
|
||||
# Verify the item was successfully added
|
||||
assert results[name], f"Vector {name} was not successfully added"
|
||||
|
||||
# Get the stored vector
|
||||
stored_vec_raw = self.redis.execute_command('VEMB', self.test_key, name)
|
||||
stored_vec = [float(x) for x in stored_vec_raw]
|
||||
|
||||
# Verify vector dimensions
|
||||
assert len(stored_vec) == dim, \
|
||||
f"Stored vector dimension mismatch for {name}: {len(stored_vec)} != {dim}"
|
||||
|
||||
# Calculate similarity with original vector
|
||||
similarity = self.verify_vector_similarity(vectors[i], stored_vec)
|
||||
assert similarity > 0.99, \
|
||||
f"Low similarity ({similarity}) for {name}"
|
||||
|
||||
num_verified += 1
|
||||
|
||||
# Final verification
|
||||
assert num_verified == total_vectors, \
|
||||
f"Only verified {num_verified} out of {total_vectors} vectors"
|
|
@ -0,0 +1,41 @@
|
|||
from test import TestCase
|
||||
import struct
|
||||
import math
|
||||
|
||||
class VEMB(TestCase):
|
||||
def getname(self):
|
||||
return "VEMB Command"
|
||||
|
||||
def test(self):
|
||||
dim = 4
|
||||
|
||||
# Add same vector in both formats
|
||||
vec = [1, 0, 0, 0]
|
||||
norm = math.sqrt(sum(x*x for x in vec))
|
||||
vec = [x/norm for x in vec] # Normalize the vector
|
||||
|
||||
# Add using FP32
|
||||
vec_bytes = struct.pack(f'{dim}f', *vec)
|
||||
self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes, f'{self.test_key}:item:1')
|
||||
|
||||
# Add using VALUES
|
||||
self.redis.execute_command('VADD', self.test_key, 'VALUES', dim,
|
||||
*[str(x) for x in vec], f'{self.test_key}:item:2')
|
||||
|
||||
# Get both back with VEMB
|
||||
result1 = self.redis.execute_command('VEMB', self.test_key, f'{self.test_key}:item:1')
|
||||
result2 = self.redis.execute_command('VEMB', self.test_key, f'{self.test_key}:item:2')
|
||||
|
||||
retrieved_vec1 = [float(x) for x in result1]
|
||||
retrieved_vec2 = [float(x) for x in result2]
|
||||
|
||||
# Compare both vectors with original (allow for small quantization errors)
|
||||
for i in range(dim):
|
||||
assert abs(vec[i] - retrieved_vec1[i]) < 0.01, \
|
||||
f"FP32 vector component {i} mismatch: expected {vec[i]}, got {retrieved_vec1[i]}"
|
||||
assert abs(vec[i] - retrieved_vec2[i]) < 0.01, \
|
||||
f"VALUES vector component {i} mismatch: expected {vec[i]}, got {retrieved_vec2[i]}"
|
||||
|
||||
# Test non-existent item
|
||||
result = self.redis.execute_command('VEMB', self.test_key, 'nonexistent')
|
||||
assert result is None, "Non-existent item should return nil"
|
|
@ -0,0 +1,47 @@
|
|||
from test import TestCase, generate_random_vector
|
||||
import struct
|
||||
|
||||
class BasicVISMEMBER(TestCase):
|
||||
def getname(self):
|
||||
return "VISMEMBER basic functionality"
|
||||
|
||||
def test(self):
|
||||
# Add multiple vectors to the vector set
|
||||
vec1 = generate_random_vector(4)
|
||||
vec2 = generate_random_vector(4)
|
||||
vec_bytes1 = struct.pack('4f', *vec1)
|
||||
vec_bytes2 = struct.pack('4f', *vec2)
|
||||
|
||||
# Create item keys
|
||||
item1 = f'{self.test_key}:item:1'
|
||||
item2 = f'{self.test_key}:item:2'
|
||||
nonexistent_item = f'{self.test_key}:item:nonexistent'
|
||||
|
||||
# Add the vectors
|
||||
self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes1, item1)
|
||||
self.redis.execute_command('VADD', self.test_key, 'FP32', vec_bytes2, item2)
|
||||
|
||||
# Test VISMEMBER with existing elements
|
||||
result1 = self.redis.execute_command('VISMEMBER', self.test_key, item1)
|
||||
assert result1 == 1, f"VISMEMBER should return 1 for existing item, got {result1}"
|
||||
|
||||
result2 = self.redis.execute_command('VISMEMBER', self.test_key, item2)
|
||||
assert result2 == 1, f"VISMEMBER should return 1 for existing item, got {result2}"
|
||||
|
||||
# Test VISMEMBER with non-existent element
|
||||
result3 = self.redis.execute_command('VISMEMBER', self.test_key, nonexistent_item)
|
||||
assert result3 == 0, f"VISMEMBER should return 0 for non-existent item, got {result3}"
|
||||
|
||||
# Test VISMEMBER with non-existent key
|
||||
nonexistent_key = f'{self.test_key}_nonexistent'
|
||||
result4 = self.redis.execute_command('VISMEMBER', nonexistent_key, item1)
|
||||
assert result4 == 0, f"VISMEMBER should return 0 for non-existent key, got {result4}"
|
||||
|
||||
# Test VISMEMBER after removing an element
|
||||
self.redis.execute_command('VREM', self.test_key, item1)
|
||||
result5 = self.redis.execute_command('VISMEMBER', self.test_key, item1)
|
||||
assert result5 == 0, f"VISMEMBER should return 0 after element removal, got {result5}"
|
||||
|
||||
# Verify item2 still exists
|
||||
result6 = self.redis.execute_command('VISMEMBER', self.test_key, item2)
|
||||
assert result6 == 1, f"VISMEMBER should still return 1 for remaining item, got {result6}"
|
|
@ -0,0 +1,55 @@
|
|||
from test import TestCase, generate_random_vector, fill_redis_with_vectors
|
||||
import struct
|
||||
|
||||
class VRANDMEMBERTest(TestCase):
|
||||
def getname(self):
|
||||
return "VRANDMEMBER basic functionality"
|
||||
|
||||
def test(self):
|
||||
# Test with empty key
|
||||
result = self.redis.execute_command('VRANDMEMBER', self.test_key)
|
||||
assert result is None, "VRANDMEMBER on non-existent key should return NULL"
|
||||
|
||||
result = self.redis.execute_command('VRANDMEMBER', self.test_key, 5)
|
||||
assert isinstance(result, list) and len(result) == 0, "VRANDMEMBER with count on non-existent key should return empty array"
|
||||
|
||||
# Fill with vectors
|
||||
dim = 4
|
||||
count = 100
|
||||
data = fill_redis_with_vectors(self.redis, self.test_key, count, dim)
|
||||
|
||||
# Test single random member
|
||||
result = self.redis.execute_command('VRANDMEMBER', self.test_key)
|
||||
assert result is not None, "VRANDMEMBER should return a random member"
|
||||
assert result.decode() in data.names, "Random member should be in the set"
|
||||
|
||||
# Test multiple unique members (positive count)
|
||||
positive_count = 10
|
||||
result = self.redis.execute_command('VRANDMEMBER', self.test_key, positive_count)
|
||||
assert isinstance(result, list), "VRANDMEMBER with positive count should return an array"
|
||||
assert len(result) == positive_count, f"Should return {positive_count} members"
|
||||
|
||||
# Check for uniqueness
|
||||
decoded_results = [r.decode() for r in result]
|
||||
assert len(decoded_results) == len(set(decoded_results)), "Results should be unique with positive count"
|
||||
for item in decoded_results:
|
||||
assert item in data.names, "All returned items should be in the set"
|
||||
|
||||
# Test more members than in the set
|
||||
result = self.redis.execute_command('VRANDMEMBER', self.test_key, count + 10)
|
||||
assert len(result) == count, "Should return only the available members when asking for more than exist"
|
||||
|
||||
# Test with duplicates (negative count)
|
||||
negative_count = -20
|
||||
result = self.redis.execute_command('VRANDMEMBER', self.test_key, negative_count)
|
||||
assert isinstance(result, list), "VRANDMEMBER with negative count should return an array"
|
||||
assert len(result) == abs(negative_count), f"Should return {abs(negative_count)} members"
|
||||
|
||||
# Check that all returned elements are valid
|
||||
decoded_results = [r.decode() for r in result]
|
||||
for item in decoded_results:
|
||||
assert item in data.names, "All returned items should be in the set"
|
||||
|
||||
# Test with count = 0 (edge case)
|
||||
result = self.redis.execute_command('VRANDMEMBER', self.test_key, 0)
|
||||
assert isinstance(result, list) and len(result) == 0, "VRANDMEMBER with count=0 should return empty array"
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,539 @@
|
|||
/*
|
||||
* HNSW (Hierarchical Navigable Small World) Implementation
|
||||
* Based on the paper by Yu. A. Malkov, D. A. Yashunin
|
||||
*
|
||||
* Copyright (c) 2009-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
* Originally authored by: Salvatore Sanfilippo
|
||||
*/
|
||||
|
||||
#define _DEFAULT_SOURCE
|
||||
#define _USE_MATH_DEFINES
|
||||
#define _POSIX_C_SOURCE 200809L
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <strings.h>
|
||||
#include <sys/time.h>
|
||||
#include <time.h>
|
||||
#include <stdint.h>
|
||||
#include <pthread.h>
|
||||
#include <stdatomic.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "hnsw.h"
|
||||
|
||||
/* Get current time in milliseconds */
|
||||
uint64_t ms_time(void) {
|
||||
struct timeval tv;
|
||||
gettimeofday(&tv, NULL);
|
||||
return (uint64_t)tv.tv_sec * 1000 + (tv.tv_usec / 1000);
|
||||
}
|
||||
|
||||
/* Implementation of the recall test with random vectors. */
|
||||
void test_recall(HNSW *index, int ef) {
|
||||
const int num_test_vectors = 10000;
|
||||
const int k = 100; // Number of nearest neighbors to find.
|
||||
if (ef < k) ef = k;
|
||||
|
||||
// Add recall distribution counters (2% bins from 0-100%).
|
||||
int recall_bins[50] = {0};
|
||||
|
||||
// Create array to store vectors for mixing.
|
||||
int num_source_vectors = 1000; // Enough, since we mix them.
|
||||
float **source_vectors = malloc(sizeof(float*) * num_source_vectors);
|
||||
if (!source_vectors) {
|
||||
printf("Failed to allocate memory for source vectors\n");
|
||||
return;
|
||||
}
|
||||
|
||||
// Allocate memory for each source vector.
|
||||
for (int i = 0; i < num_source_vectors; i++) {
|
||||
source_vectors[i] = malloc(sizeof(float) * 300);
|
||||
if (!source_vectors[i]) {
|
||||
printf("Failed to allocate memory for source vector %d\n", i);
|
||||
// Clean up already allocated vectors.
|
||||
for (int j = 0; j < i; j++) free(source_vectors[j]);
|
||||
free(source_vectors);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* Populate source vectors from the index, we just scan the
|
||||
* first N items. */
|
||||
int source_count = 0;
|
||||
hnswNode *current = index->head;
|
||||
while (current && source_count < num_source_vectors) {
|
||||
hnsw_get_node_vector(index, current, source_vectors[source_count]);
|
||||
source_count++;
|
||||
current = current->next;
|
||||
}
|
||||
|
||||
if (source_count < num_source_vectors) {
|
||||
printf("Warning: Only found %d nodes for source vectors\n",
|
||||
source_count);
|
||||
num_source_vectors = source_count;
|
||||
}
|
||||
|
||||
// Allocate memory for test vector.
|
||||
float *test_vector = malloc(sizeof(float) * 300);
|
||||
if (!test_vector) {
|
||||
printf("Failed to allocate memory for test vector\n");
|
||||
for (int i = 0; i < num_source_vectors; i++) {
|
||||
free(source_vectors[i]);
|
||||
}
|
||||
free(source_vectors);
|
||||
return;
|
||||
}
|
||||
|
||||
// Allocate memory for results.
|
||||
hnswNode **hnsw_results = malloc(sizeof(hnswNode*) * ef);
|
||||
hnswNode **linear_results = malloc(sizeof(hnswNode*) * ef);
|
||||
float *hnsw_distances = malloc(sizeof(float) * ef);
|
||||
float *linear_distances = malloc(sizeof(float) * ef);
|
||||
|
||||
if (!hnsw_results || !linear_results || !hnsw_distances || !linear_distances) {
|
||||
printf("Failed to allocate memory for results\n");
|
||||
if (hnsw_results) free(hnsw_results);
|
||||
if (linear_results) free(linear_results);
|
||||
if (hnsw_distances) free(hnsw_distances);
|
||||
if (linear_distances) free(linear_distances);
|
||||
for (int i = 0; i < num_source_vectors; i++) free(source_vectors[i]);
|
||||
free(source_vectors);
|
||||
free(test_vector);
|
||||
return;
|
||||
}
|
||||
|
||||
// Initialize random seed.
|
||||
srand(time(NULL));
|
||||
|
||||
// Perform recall test.
|
||||
printf("\nPerforming recall test with EF=%d on %d random vectors...\n",
|
||||
ef, num_test_vectors);
|
||||
double total_recall = 0.0;
|
||||
|
||||
for (int t = 0; t < num_test_vectors; t++) {
|
||||
// Create a random vector by mixing 3 existing vectors.
|
||||
float weights[3] = {0.0};
|
||||
int src_indices[3] = {0};
|
||||
|
||||
// Generate random weights.
|
||||
float weight_sum = 0.0;
|
||||
for (int i = 0; i < 3; i++) {
|
||||
weights[i] = (float)rand() / RAND_MAX;
|
||||
weight_sum += weights[i];
|
||||
src_indices[i] = rand() % num_source_vectors;
|
||||
}
|
||||
|
||||
// Normalize weights.
|
||||
for (int i = 0; i < 3; i++) weights[i] /= weight_sum;
|
||||
|
||||
// Mix vectors.
|
||||
memset(test_vector, 0, sizeof(float) * 300);
|
||||
for (int i = 0; i < 3; i++) {
|
||||
for (int j = 0; j < 300; j++) {
|
||||
test_vector[j] +=
|
||||
weights[i] * source_vectors[src_indices[i]][j];
|
||||
}
|
||||
}
|
||||
|
||||
// Perform HNSW search with the specified EF parameter.
|
||||
int slot = hnsw_acquire_read_slot(index);
|
||||
int hnsw_found = hnsw_search(index, test_vector, ef, hnsw_results, hnsw_distances, slot, 0);
|
||||
|
||||
// Perform linear search (ground truth).
|
||||
int linear_found = hnsw_ground_truth_with_filter(index, test_vector, ef, linear_results, linear_distances, slot, 0, NULL, NULL);
|
||||
hnsw_release_read_slot(index, slot);
|
||||
|
||||
// Calculate recall for this query (intersection size / k).
|
||||
if (hnsw_found > k) hnsw_found = k;
|
||||
if (linear_found > k) linear_found = k;
|
||||
int intersection_count = 0;
|
||||
for (int i = 0; i < linear_found; i++) {
|
||||
for (int j = 0; j < hnsw_found; j++) {
|
||||
if (linear_results[i] == hnsw_results[j]) {
|
||||
intersection_count++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
double recall = (double)intersection_count / linear_found;
|
||||
total_recall += recall;
|
||||
|
||||
// Add to distribution bins (2% steps)
|
||||
int bin_index = (int)(recall * 50);
|
||||
if (bin_index >= 50) bin_index = 49; // Handle 100% recall case
|
||||
recall_bins[bin_index]++;
|
||||
|
||||
// Show progress.
|
||||
if ((t+1) % 1000 == 0 || t == num_test_vectors-1) {
|
||||
printf("Processed %d/%d queries, current avg recall: %.2f%%\n",
|
||||
t+1, num_test_vectors, (total_recall / (t+1)) * 100);
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate and print final average recall.
|
||||
double avg_recall = (total_recall / num_test_vectors) * 100;
|
||||
printf("\nRecall Test Results:\n");
|
||||
printf("Average recall@%d (EF=%d): %.2f%%\n", k, ef, avg_recall);
|
||||
|
||||
// Print recall distribution histogram.
|
||||
printf("\nRecall Distribution (2%% bins):\n");
|
||||
printf("================================\n");
|
||||
|
||||
// Find the maximum bin count for scaling.
|
||||
int max_count = 0;
|
||||
for (int i = 0; i < 50; i++) {
|
||||
if (recall_bins[i] > max_count) max_count = recall_bins[i];
|
||||
}
|
||||
|
||||
// Scale factor for histogram (max 50 chars wide)
|
||||
const int max_bars = 50;
|
||||
double scale = (max_count > max_bars) ? (double)max_bars / max_count : 1.0;
|
||||
|
||||
// Print the histogram.
|
||||
for (int i = 0; i < 50; i++) {
|
||||
int bar_len = (int)(recall_bins[i] * scale);
|
||||
printf("%3d%%-%-3d%% | %-6d |", i*2, (i+1)*2, recall_bins[i]);
|
||||
for (int j = 0; j < bar_len; j++) printf("#");
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
// Cleanup.
|
||||
free(hnsw_results);
|
||||
free(linear_results);
|
||||
free(hnsw_distances);
|
||||
free(linear_distances);
|
||||
free(test_vector);
|
||||
for (int i = 0; i < num_source_vectors; i++) free(source_vectors[i]);
|
||||
free(source_vectors);
|
||||
}
|
||||
|
||||
/* Example usage in main() */
|
||||
int w2v_single_thread(int m_param, int quantization, uint64_t numele, int massdel, int self_recall, int recall_ef) {
|
||||
/* Create index */
|
||||
HNSW *index = hnsw_new(300, quantization, m_param);
|
||||
float v[300];
|
||||
uint16_t wlen;
|
||||
|
||||
FILE *fp = fopen("word2vec.bin","rb");
|
||||
if (fp == NULL) {
|
||||
perror("word2vec.bin file missing");
|
||||
exit(1);
|
||||
}
|
||||
unsigned char header[8];
|
||||
if (fread(header,8,1,fp) <= 0) { // Skip header
|
||||
perror("Unexpected EOF");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
uint64_t id = 0;
|
||||
uint64_t start_time = ms_time();
|
||||
char *word = NULL;
|
||||
hnswNode *search_node = NULL;
|
||||
|
||||
while(id < numele) {
|
||||
if (fread(&wlen,2,1,fp) == 0) break;
|
||||
word = malloc(wlen+1);
|
||||
if (fread(word,wlen,1,fp) <= 0) {
|
||||
perror("unexpected EOF");
|
||||
exit(1);
|
||||
}
|
||||
word[wlen] = 0;
|
||||
if (fread(v,300*sizeof(float),1,fp) <= 0) {
|
||||
perror("unexpected EOF");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Plain API that acquires a write lock for the whole time.
|
||||
hnswNode *added = hnsw_insert(index, v, NULL, 0, id++, word, 200);
|
||||
|
||||
if (!strcmp(word,"banana")) search_node = added;
|
||||
if (!(id % 10000)) printf("%llu added\n", (unsigned long long)id);
|
||||
}
|
||||
uint64_t elapsed = ms_time() - start_time;
|
||||
fclose(fp);
|
||||
|
||||
printf("%llu words added (%llu words/sec), last word: %s\n",
|
||||
(unsigned long long)index->node_count,
|
||||
(unsigned long long)id*1000/elapsed, word);
|
||||
|
||||
/* Search query */
|
||||
if (search_node == NULL) search_node = index->head;
|
||||
hnsw_get_node_vector(index,search_node,v);
|
||||
hnswNode *neighbors[10];
|
||||
float distances[10];
|
||||
|
||||
int found, j;
|
||||
start_time = ms_time();
|
||||
for (j = 0; j < 20000; j++)
|
||||
found = hnsw_search(index, v, 10, neighbors, distances, 0, 0);
|
||||
elapsed = ms_time() - start_time;
|
||||
printf("%d searches performed (%llu searches/sec), nodes found: %d\n",
|
||||
j, (unsigned long long)j*1000/elapsed, found);
|
||||
|
||||
if (found > 0) {
|
||||
printf("Found %d neighbors:\n", found);
|
||||
for (int i = 0; i < found; i++) {
|
||||
printf("Node ID: %llu, distance: %f, word: %s\n",
|
||||
(unsigned long long)neighbors[i]->id,
|
||||
distances[i], (char*)neighbors[i]->value);
|
||||
}
|
||||
}
|
||||
|
||||
// Self-recall test (ability to find the node by its own vector).
|
||||
if (self_recall) {
|
||||
hnsw_print_stats(index);
|
||||
hnsw_test_graph_recall(index,200,0);
|
||||
}
|
||||
|
||||
// Recall test with random vectors.
|
||||
if (recall_ef > 0) {
|
||||
test_recall(index, recall_ef);
|
||||
}
|
||||
|
||||
uint64_t connected_nodes;
|
||||
int reciprocal_links;
|
||||
hnsw_validate_graph(index, &connected_nodes, &reciprocal_links);
|
||||
|
||||
if (massdel) {
|
||||
int remove_perc = 95;
|
||||
printf("\nRemoving %d%% of nodes...\n", remove_perc);
|
||||
uint64_t initial_nodes = index->node_count;
|
||||
|
||||
hnswNode *current = index->head;
|
||||
while (current && index->node_count > initial_nodes*(100-remove_perc)/100) {
|
||||
hnswNode *next = current->next;
|
||||
hnsw_delete_node(index,current,free);
|
||||
current = next;
|
||||
// In order to don't remove only contiguous nodes, from time
|
||||
// skip a node.
|
||||
if (current && !(random() % remove_perc)) current = current->next;
|
||||
}
|
||||
printf("%llu nodes left\n", (unsigned long long)index->node_count);
|
||||
|
||||
// Test again.
|
||||
hnsw_validate_graph(index, &connected_nodes, &reciprocal_links);
|
||||
hnsw_test_graph_recall(index,200,0);
|
||||
}
|
||||
|
||||
hnsw_free(index,free);
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct threadContext {
|
||||
pthread_mutex_t FileAccessMutex;
|
||||
uint64_t numele;
|
||||
_Atomic uint64_t SearchesDone;
|
||||
_Atomic uint64_t id;
|
||||
FILE *fp;
|
||||
HNSW *index;
|
||||
float *search_vector;
|
||||
};
|
||||
|
||||
// Note that in practical terms inserting with many concurrent threads
|
||||
// may be *slower* and not faster, because there is a lot of
|
||||
// contention. So this is more a robustness test than anything else.
|
||||
//
|
||||
// The optimistic commit API goal is actually to exploit the ability to
|
||||
// add faster when there are many concurrent reads.
|
||||
void *threaded_insert(void *ctxptr) {
|
||||
struct threadContext *ctx = ctxptr;
|
||||
char *word;
|
||||
float v[300];
|
||||
uint16_t wlen;
|
||||
|
||||
while(1) {
|
||||
pthread_mutex_lock(&ctx->FileAccessMutex);
|
||||
if (fread(&wlen,2,1,ctx->fp) == 0) break;
|
||||
pthread_mutex_unlock(&ctx->FileAccessMutex);
|
||||
word = malloc(wlen+1);
|
||||
if (fread(word,wlen,1,ctx->fp) <= 0) {
|
||||
perror("Unexpected EOF");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
word[wlen] = 0;
|
||||
if (fread(v,300*sizeof(float),1,ctx->fp) <= 0) {
|
||||
perror("Unexpected EOF");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// Check-and-set API that performs the costly scan for similar
|
||||
// nodes concurrently with other read threads, and finally
|
||||
// applies the check if the graph wasn't modified.
|
||||
InsertContext *ic;
|
||||
uint64_t next_id = ctx->id++;
|
||||
ic = hnsw_prepare_insert(ctx->index, v, NULL, 0, next_id, 200);
|
||||
if (hnsw_try_commit_insert(ctx->index, ic, word) == NULL) {
|
||||
// This time try locking since the start.
|
||||
hnsw_insert(ctx->index, v, NULL, 0, next_id, word, 200);
|
||||
}
|
||||
|
||||
if (next_id >= ctx->numele) break;
|
||||
if (!((next_id+1) % 10000))
|
||||
printf("%llu added\n", (unsigned long long)next_id+1);
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void *threaded_search(void *ctxptr) {
|
||||
struct threadContext *ctx = ctxptr;
|
||||
|
||||
/* Search query */
|
||||
hnswNode *neighbors[10];
|
||||
float distances[10];
|
||||
int found = 0;
|
||||
uint64_t last_id = 0;
|
||||
|
||||
while(ctx->id < 1000000) {
|
||||
int slot = hnsw_acquire_read_slot(ctx->index);
|
||||
found = hnsw_search(ctx->index, ctx->search_vector, 10, neighbors, distances, slot, 0);
|
||||
hnsw_release_read_slot(ctx->index,slot);
|
||||
last_id = ++ctx->id;
|
||||
}
|
||||
|
||||
if (found > 0 && last_id == 1000000) {
|
||||
printf("Found %d neighbors:\n", found);
|
||||
for (int i = 0; i < found; i++) {
|
||||
printf("Node ID: %llu, distance: %f, word: %s\n",
|
||||
(unsigned long long)neighbors[i]->id,
|
||||
distances[i], (char*)neighbors[i]->value);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int w2v_multi_thread(int m_param, int numthreads, int quantization, uint64_t numele) {
|
||||
/* Create index */
|
||||
struct threadContext ctx;
|
||||
|
||||
ctx.index = hnsw_new(300, quantization, m_param);
|
||||
|
||||
ctx.fp = fopen("word2vec.bin","rb");
|
||||
if (ctx.fp == NULL) {
|
||||
perror("word2vec.bin file missing");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
unsigned char header[8];
|
||||
if (fread(header,8,1,ctx.fp) <= 0) { // Skip header
|
||||
perror("Unexpected EOF");
|
||||
exit(1);
|
||||
}
|
||||
pthread_mutex_init(&ctx.FileAccessMutex,NULL);
|
||||
|
||||
uint64_t start_time = ms_time();
|
||||
ctx.id = 0;
|
||||
ctx.numele = numele;
|
||||
pthread_t threads[numthreads];
|
||||
for (int j = 0; j < numthreads; j++)
|
||||
pthread_create(&threads[j], NULL, threaded_insert, &ctx);
|
||||
|
||||
// Wait for all the threads to terminate adding items.
|
||||
for (int j = 0; j < numthreads; j++)
|
||||
pthread_join(threads[j],NULL);
|
||||
|
||||
uint64_t elapsed = ms_time() - start_time;
|
||||
fclose(ctx.fp);
|
||||
|
||||
// Obtain the last word.
|
||||
hnswNode *node = ctx.index->head;
|
||||
char *word = node->value;
|
||||
|
||||
// We will search this last inserted word in the next test.
|
||||
// Let's save its embedding.
|
||||
ctx.search_vector = malloc(sizeof(float)*300);
|
||||
hnsw_get_node_vector(ctx.index,node,ctx.search_vector);
|
||||
|
||||
printf("%llu words added (%llu words/sec), last word: %s\n",
|
||||
(unsigned long long)ctx.index->node_count,
|
||||
(unsigned long long)ctx.id*1000/elapsed, word);
|
||||
|
||||
/* Search query */
|
||||
start_time = ms_time();
|
||||
ctx.id = 0; // We will use this atomic field to stop at N queries done.
|
||||
|
||||
for (int j = 0; j < numthreads; j++)
|
||||
pthread_create(&threads[j], NULL, threaded_search, &ctx);
|
||||
|
||||
// Wait for all the threads to terminate searching.
|
||||
for (int j = 0; j < numthreads; j++)
|
||||
pthread_join(threads[j],NULL);
|
||||
|
||||
elapsed = ms_time() - start_time;
|
||||
printf("%llu searches performed (%llu searches/sec)\n",
|
||||
(unsigned long long)ctx.id,
|
||||
(unsigned long long)ctx.id*1000/elapsed);
|
||||
|
||||
hnsw_print_stats(ctx.index);
|
||||
uint64_t connected_nodes;
|
||||
int reciprocal_links;
|
||||
hnsw_validate_graph(ctx.index, &connected_nodes, &reciprocal_links);
|
||||
printf("%llu connected nodes. Links all reciprocal: %d\n",
|
||||
(unsigned long long)connected_nodes, reciprocal_links);
|
||||
hnsw_free(ctx.index,free);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
int quantization = HNSW_QUANT_NONE;
|
||||
int numthreads = 0;
|
||||
uint64_t numele = 20000;
|
||||
int m_param = 0; // Default value (0 means use HNSW_DEFAULT_M)
|
||||
|
||||
/* This you can enable in single thread mode for testing: */
|
||||
int massdel = 0; // If true, does the mass deletion test.
|
||||
int self_recall = 0; // If true, does the self-recall test.
|
||||
int recall_ef = 0; // If not 0, does the recall test with this EF value.
|
||||
|
||||
for (int j = 1; j < argc; j++) {
|
||||
int moreargs = argc-j-1;
|
||||
|
||||
if (!strcasecmp(argv[j],"--quant")) {
|
||||
quantization = HNSW_QUANT_Q8;
|
||||
} else if (!strcasecmp(argv[j],"--bin")) {
|
||||
quantization = HNSW_QUANT_BIN;
|
||||
} else if (!strcasecmp(argv[j],"--mass-del")) {
|
||||
massdel = 1;
|
||||
} else if (!strcasecmp(argv[j],"--self-recall")) {
|
||||
self_recall = 1;
|
||||
} else if (moreargs >= 1 && !strcasecmp(argv[j],"--recall")) {
|
||||
recall_ef = atoi(argv[j+1]);
|
||||
j++;
|
||||
} else if (moreargs >= 1 && !strcasecmp(argv[j],"--threads")) {
|
||||
numthreads = atoi(argv[j+1]);
|
||||
j++;
|
||||
} else if (moreargs >= 1 && !strcasecmp(argv[j],"--numele")) {
|
||||
numele = strtoll(argv[j+1],NULL,0);
|
||||
j++;
|
||||
if (numele < 1) numele = 1;
|
||||
} else if (moreargs >= 1 && !strcasecmp(argv[j],"--m")) {
|
||||
m_param = atoi(argv[j+1]);
|
||||
j++;
|
||||
} else if (!strcasecmp(argv[j],"--help")) {
|
||||
printf("%s [--quant] [--bin] [--thread <count>] [--numele <count>] [--m <count>] [--mass-del] [--self-recall] [--recall <ef>]\n", argv[0]);
|
||||
exit(0);
|
||||
} else {
|
||||
printf("Unrecognized option or wrong number of arguments: %s\n", argv[j]);
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if (quantization == HNSW_QUANT_NONE) {
|
||||
printf("You can enable quantization with --quant\n");
|
||||
}
|
||||
|
||||
if (numthreads > 0) {
|
||||
w2v_multi_thread(m_param, numthreads, quantization, numele);
|
||||
} else {
|
||||
printf("Single thread execution. Use --threads 4 for concurrent API\n");
|
||||
w2v_single_thread(m_param, quantization, numele, massdel, self_recall, recall_ef);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,376 @@
|
|||
include redis.conf
|
||||
|
||||
loadmodule ./modules/redisbloom/redisbloom.so
|
||||
loadmodule ./modules/redisearch/redisearch.so
|
||||
loadmodule ./modules/redisjson/rejson.so
|
||||
loadmodule ./modules/redistimeseries/redistimeseries.so
|
||||
|
||||
############################## QUERY ENGINE CONFIG ############################
|
||||
|
||||
# Keep numeric ranges in numeric tree parent nodes of leafs for `x` generations.
|
||||
# numeric, valid range: [0, 2], default: 0
|
||||
#
|
||||
# search-_numeric-ranges-parents 0
|
||||
|
||||
# The number of iterations to run while performing background indexing
|
||||
# before we call usleep(1) (sleep for 1 micro-second) and make sure that we
|
||||
# allow redis to process other commands.
|
||||
# numeric, valid range: [1, UINT32_MAX], default: 100
|
||||
#
|
||||
# search-bg-index-sleep-gap 100
|
||||
|
||||
# The default dialect used in search queries.
|
||||
# numeric, valid range: [1, 4], default: 1
|
||||
#
|
||||
# search-default-dialect 1
|
||||
|
||||
# the fork gc will only start to clean when the number of not cleaned document
|
||||
# will exceed this threshold.
|
||||
# numeric, valid range: [1, LLONG_MAX], default: 100
|
||||
#
|
||||
# search-fork-gc-clean-threshold 100
|
||||
|
||||
# interval (in seconds) in which to retry running the forkgc after failure.
|
||||
# numeric, valid range: [1, LLONG_MAX], default: 5
|
||||
#
|
||||
# search-fork-gc-retry-interval 5
|
||||
|
||||
# interval (in seconds) in which to run the fork gc (relevant only when fork
|
||||
# gc is used).
|
||||
# numeric, valid range: [1, LLONG_MAX], default: 30
|
||||
#
|
||||
# search-fork-gc-run-interval 30
|
||||
|
||||
# the amount of seconds for the fork GC to sleep before exiting.
|
||||
# numeric, valid range: [0, LLONG_MAX], default: 0
|
||||
#
|
||||
# search-fork-gc-sleep-before-exit 0
|
||||
|
||||
# Scan this many documents at a time during every GC iteration.
|
||||
# numeric, valid range: [1, LLONG_MAX], default: 100
|
||||
#
|
||||
# search-gc-scan-size 100
|
||||
|
||||
# Max number of cursors for a given index that can be opened inside of a shard.
|
||||
# numeric, valid range: [0, LLONG_MAX], default: 128
|
||||
#
|
||||
# search-index-cursor-limit 128
|
||||
|
||||
# Maximum number of results from ft.aggregate command.
|
||||
# numeric, valid range: [0, (1ULL << 31)], default: 1ULL << 31
|
||||
#
|
||||
# search-max-aggregate-results 2147483648
|
||||
|
||||
# Maximum prefix expansions to be used in a query.
|
||||
# numeric, valid range: [1, LLONG_MAX], default: 200
|
||||
#
|
||||
# search-max-prefix-expansions 200
|
||||
|
||||
# Maximum runtime document table size (for this process).
|
||||
# numeric, valid range: [1, 100000000], default: 1000000
|
||||
#
|
||||
# search-max-doctablesize 1000000
|
||||
|
||||
# max idle time allowed to be set for cursor, setting it high might cause
|
||||
# high memory consumption.
|
||||
# numeric, valid range: [1, LLONG_MAX], default: 300000
|
||||
#
|
||||
# search-cursor-max-idle 300000
|
||||
|
||||
# Maximum number of results from ft.search command.
|
||||
# numeric, valid range: [0, 1ULL << 31], default: 1000000
|
||||
#
|
||||
# search-max-search-results 1000000
|
||||
|
||||
# Number of worker threads to use for background tasks when the server is
|
||||
# in an operation event.
|
||||
# numeric, valid range: [1, 16], default: 4
|
||||
#
|
||||
# search-min-operation-workers 4
|
||||
|
||||
# Minimum length of term to be considered for phonetic matching.
|
||||
# numeric, valid range: [1, LLONG_MAX], default: 3
|
||||
#
|
||||
# search-min-phonetic-term-len 3
|
||||
|
||||
# the minimum prefix for expansions (`*`).
|
||||
# numeric, valid range: [1, LLONG_MAX], default: 2
|
||||
#
|
||||
# search-min-prefix 2
|
||||
|
||||
# the minimum word length to stem.
|
||||
# numeric, valid range: [2, UINT32_MAX], default: 4
|
||||
#
|
||||
# search-min-stem-len 4
|
||||
|
||||
# Delta used to increase positional offsets between array
|
||||
# slots for multi text values.
|
||||
# Can control the level of separation between phrases in different
|
||||
# array slots (related to the SLOP parameter of ft.search command)"
|
||||
# numeric, valid range: [1, UINT32_MAX], default: 100
|
||||
#
|
||||
# search-multi-text-slop 100
|
||||
|
||||
# Used for setting the buffer limit threshold for vector similarity tiered
|
||||
# HNSW index, so that if we are using WORKERS for indexing, and the
|
||||
# number of vectors waiting in the buffer to be indexed exceeds this limit,
|
||||
# we insert new vectors directly into HNSW.
|
||||
# numeric, valid range: [0, LLONG_MAX], default: 1024
|
||||
#
|
||||
# search-tiered-hnsw-buffer-limit 1024
|
||||
|
||||
# Query timeout.
|
||||
# numeric, valid range: [1, LLONG_MAX], default: 500
|
||||
#
|
||||
# search-timeout 500
|
||||
|
||||
# minimum number of iterators in a union from which the iterator will
|
||||
# will switch to heap-based implementation.
|
||||
# numeric, valid range: [1, LLONG_MAX], default: 20
|
||||
# switch to heap based implementation.
|
||||
#
|
||||
# search-union-iterator-heap 20
|
||||
|
||||
# The maximum memory resize for vector similarity indexes (in bytes).
|
||||
# numeric, valid range: [0, UINT32_MAX], default: 0
|
||||
#
|
||||
# search-vss-max-resize 0
|
||||
|
||||
# Number of worker threads to use for query processing and background tasks.
|
||||
# numeric, valid range: [0, 16], default: 0
|
||||
# This configuration also affects the number of connections per shard.
|
||||
#
|
||||
# search-workers 0
|
||||
|
||||
# The number of high priority tasks to be executed at any given time by the
|
||||
# worker thread pool, before executing low priority tasks. After this number
|
||||
# of high priority tasks are being executed, the worker thread pool will
|
||||
# execute high and low priority tasks alternately.
|
||||
# numeric, valid range: [0, LLONG_MAX], default: 1
|
||||
#
|
||||
# search-workers-priority-bias-threshold 1
|
||||
|
||||
# Load extension scoring/expansion module. Immutable.
|
||||
# string, default: ""
|
||||
#
|
||||
# search-ext-load ""
|
||||
|
||||
# Path to Chinese dictionary configuration file (for Chinese tokenization). Immutable.
|
||||
# string, default: ""
|
||||
#
|
||||
# search-friso-ini ""
|
||||
|
||||
# Action to perform when search timeout is exceeded (choose RETURN or FAIL).
|
||||
# enum, valid values: ["return", "fail"], default: "fail"
|
||||
#
|
||||
# search-on-timeout fail
|
||||
|
||||
# Determine whether some index resources are free on a second thread.
|
||||
# bool, default: yes
|
||||
#
|
||||
# search-_free-resource-on-thread yes
|
||||
|
||||
# Enable legacy compression of double to float.
|
||||
# bool, default: no
|
||||
#
|
||||
# search-_numeric-compress no
|
||||
|
||||
# Disable print of time for ft.profile. For testing only.
|
||||
# bool, default: yes
|
||||
#
|
||||
# search-_print-profile-clock yes
|
||||
|
||||
# Intersection iterator orders the children iterators by their relative estimated
|
||||
# number of results in ascending order, so that if we see first iterators with
|
||||
# a lower count of results we will skip a larger number of results, which
|
||||
# translates into faster iteration. If this flag is set, we use this
|
||||
# optimization in a way where union iterators are being factorize by the number
|
||||
# of their own children, so that we sort by the number of children times the
|
||||
# overall estimated number of results instead.
|
||||
# bool, default: no
|
||||
#
|
||||
# search-_prioritize-intersect-union-children no
|
||||
|
||||
# Set to run without memory pools.
|
||||
# bool, default: no
|
||||
#
|
||||
# search-no-mem-pools no
|
||||
|
||||
# Disable garbage collection (for this process).
|
||||
# bool, default: no
|
||||
#
|
||||
# search-no-gc no
|
||||
|
||||
# Enable commands filter which optimize indexing on partial hash updates.
|
||||
# bool, default: no
|
||||
#
|
||||
# search-partial-indexed-docs no
|
||||
|
||||
# Disable compression for DocID inverted index. Boost CPU performance.
|
||||
# bool, default: no
|
||||
#
|
||||
# search-raw-docid-encoding no
|
||||
|
||||
# Number of search threads in the coordinator thread pool.
|
||||
# numeric, valid range: [1, LLONG_MAX], default: 20
|
||||
#
|
||||
# search-threads 20
|
||||
|
||||
# Timeout for topology validation (in milliseconds). After this timeout,
|
||||
# any pending requests will be processed, even if the topology is not fully connected.
|
||||
# numeric, valid range: [0, LLONG_MAX], default: 30000
|
||||
#
|
||||
# search-topology-validation-timeout 30000
|
||||
|
||||
|
||||
############################## TIME SERIES CONFIG #############################
|
||||
|
||||
# The maximal number of per-shard threads for cross-key queries when using cluster mode
|
||||
# (TS.MRANGE, TS.MREVRANGE, TS.MGET, and TS.QUERYINDEX).
|
||||
# Note: increasing this value may either increase or decrease the performance.
|
||||
# integer, valid range: [1..16], default: 3
|
||||
# This is a load-time configuration parameter.
|
||||
#
|
||||
# ts-num-threads 3
|
||||
|
||||
|
||||
# Default compaction rules for newly created key with TS.ADD, TS.INCRBY, and TS.DECRBY.
|
||||
# Has no effect on keys created with TS.CREATE.
|
||||
# This default value is applied to each new time series upon its creation.
|
||||
# string, see documentation for rules format, default: no compaction rules
|
||||
#
|
||||
# ts-compaction-policy ""
|
||||
|
||||
# Default chunk encoding for automatically-created compacted time series.
|
||||
# This default value is applied to each new compacted time series automatically
|
||||
# created when ts-compaction-policy is specified.
|
||||
# valid values: COMPRESSED, UNCOMPRESSED, default: COMPRESSED
|
||||
#
|
||||
# ts-encoding COMPRESSED
|
||||
|
||||
|
||||
# Default retention period, in milliseconds. 0 means no expiration.
|
||||
# This default value is applied to each new time series upon its creation.
|
||||
# If ts-compaction-policy is specified - it is overridden for created
|
||||
# compactions as specified in ts-compaction-policy.
|
||||
# integer, valid range: [0 .. LLONG_MAX], default: 0
|
||||
#
|
||||
# ts-retention-policy 0
|
||||
|
||||
# Default policy for handling insertion (TS.ADD and TS.MADD) of multiple
|
||||
# samples with identical timestamps.
|
||||
# This default value is applied to each new time series upon its creation.
|
||||
# string, valid values: BLOCK, FIRST, LAST, MIN, MAX, SUM, default: BLOCK
|
||||
#
|
||||
# ts-duplicate-policy BLOCK
|
||||
|
||||
# Default initial allocation size, in bytes, for the data part of each new chunk
|
||||
# This default value is applied to each new time series upon its creation.
|
||||
# integer, valid range: [48 .. 1048576]; must be a multiple of 8, default: 4096
|
||||
#
|
||||
# ts-chunk-size-bytes 4096
|
||||
|
||||
# Default values for newly created time series.
|
||||
# Many sensors report data periodically. Often, the difference between the measured
|
||||
# value and the previous measured value is negligible and related to random noise
|
||||
# or to measurement accuracy limitations. In such situations it may be preferable
|
||||
# not to add the new measurement to the time series.
|
||||
# A new sample is considered a duplicate and is ignored if the following conditions are met:
|
||||
# - The time series is not a compaction;
|
||||
# - The time series' DUPLICATE_POLICY IS LAST;
|
||||
# - The sample is added in-order (timestamp >= max_timestamp);
|
||||
# - The difference of the current timestamp from the previous timestamp
|
||||
# (timestamp - max_timestamp) is less than or equal to ts-ignore-max-time-diff
|
||||
# - The absolute value difference of the current value from the value at the previous maximum timestamp
|
||||
# (abs(value - value_at_max_timestamp) is less than or equal to ts-ignore-max-val-diff.
|
||||
# where max_timestamp is the timestamp of the sample with the largest timestamp in the time series,
|
||||
# and value_at_max_timestamp is the value at max_timestamp.
|
||||
# ts-ignore-max-time-diff: integer, valid range: [0 .. LLONG_MAX], default: 0
|
||||
# ts-ignore-max-val-diff: double, Valid range: [0 .. DBL_MAX], default: 0
|
||||
#
|
||||
# ts-ignore-max-time-diff 0
|
||||
# ts-ignore-max-val-diff 0
|
||||
|
||||
|
||||
########################### BLOOM FILTERS CONFIG ##############################
|
||||
|
||||
# Defaults values for new Bloom filters created with BF.ADD, BF.MADD, BF.INSERT, and BF.RESERVE
|
||||
# These defaults are applied to each new Bloom filter upon its creation.
|
||||
|
||||
# Error ratio
|
||||
# The desired probability for false positives.
|
||||
# For a false positive rate of 0.1% (1 in 1000) - the value should be 0.001.
|
||||
# double, Valid range: (0 .. 1), value greater than 0.25 is treated as 0.25, default: 0.01
|
||||
#
|
||||
# bf-error-rate 0.01
|
||||
|
||||
# Initial capacity
|
||||
# The number of entries intended to be added to the filter.
|
||||
# integer, valid range: [1 .. 1GB], default: 100
|
||||
#
|
||||
# bf-initial-size 100
|
||||
|
||||
# Expansion factor
|
||||
# When capacity is reached, an additional sub-filter is created.
|
||||
# The size of the new sub-filter is the size of the last sub-filter multiplied
|
||||
# by expansion.
|
||||
# integer, [0 .. 32768]. 0 is equivalent to NONSCALING. default: 2
|
||||
#
|
||||
# bf-expansion-factor 2
|
||||
|
||||
|
||||
########################### CUCKOO FILTERS CONFIG #############################
|
||||
|
||||
# Defaults values for new Cuckoo filters created with
|
||||
# CF.ADD, CF.ADDNX, CF.INSERT, CF.INSERTNX, and CF.RESERVE
|
||||
# These defaults are applied to each new Cuckoo filter upon its creation.
|
||||
|
||||
# Initial capacity
|
||||
# A filter will likely not fill up to 100% of its capacity.
|
||||
# Make sure to reserve extra capacity if you want to avoid expansions.
|
||||
# value is rounded to the next 2^n integer.
|
||||
# integer, valid range: [2*cf-bucket-size .. 1GB], default: 1024
|
||||
#
|
||||
# cf-initial-size 1024
|
||||
|
||||
# Number of items in each bucket
|
||||
# The minimal false positive rate is 2/255 ~ 0.78% when bucket size of 1 is used.
|
||||
# Larger buckets increase the error rate linearly, but improve the fill rate.
|
||||
# integer, valid range: [1 .. 255], default: 2
|
||||
#
|
||||
# cf-bucket-size 2
|
||||
|
||||
# Maximum iterations
|
||||
# Number of attempts to swap items between buckets before declaring filter
|
||||
# as full and creating an additional filter.
|
||||
# A lower value improves performance. A higher value improves fill rate.
|
||||
# integer, Valid range: [1 .. 65535], default: 20
|
||||
#
|
||||
# cf-max-iterations 20
|
||||
|
||||
# Expansion factor
|
||||
# When a new filter is created, its size is the size of the current filter
|
||||
# multiplied by this factor.
|
||||
# integer, Valid range: [0 .. 32768], 0 is equivalent to NONSCALING, default: 1
|
||||
#
|
||||
# cf-expansion-factor 1
|
||||
|
||||
# Maximum expansions
|
||||
# integer, Valid range: [1 .. 65536], default: 32
|
||||
#
|
||||
# cf-max-expansions 32
|
||||
|
||||
|
||||
################################## SECURITY ###################################
|
||||
#
|
||||
# The following is a list of command categories and their meanings:
|
||||
#
|
||||
# * search - Query engine related.
|
||||
# * json - Data type: JSON related.
|
||||
# * timeseries - Data type: time series related.
|
||||
# * bloom - Data type: Bloom filter related.
|
||||
# * cuckoo - Data type: cuckoo filter related.
|
||||
# * topk - Data type: top-k related.
|
||||
# * cms - Data type: count-min sketch related.
|
||||
# * tdigest - Data type: t-digest related.
|
||||
|
33
src/Makefile
33
src/Makefile
|
@ -2,8 +2,9 @@
|
|||
# Copyright (c) 2011-Present, Redis Ltd.
|
||||
# All rights reserved.
|
||||
#
|
||||
# Licensed under your choice of the Redis Source Available License 2.0
|
||||
# (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
# Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
# (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
# GNU Affero General Public License v3 (AGPLv3).
|
||||
#
|
||||
# The Makefile composes the final FINAL_CFLAGS and FINAL_LDFLAGS using
|
||||
# what is needed for Redis plus the standard CFLAGS and LDFLAGS passed.
|
||||
|
@ -52,6 +53,7 @@ endif
|
|||
WARN=-Wall -W -Wno-missing-field-initializers -Werror=deprecated-declarations -Wstrict-prototypes
|
||||
OPT=$(OPTIMIZATION)
|
||||
|
||||
SKIP_VEC_SETS?=no
|
||||
# Detect if the compiler supports C11 _Atomic.
|
||||
# NUMBER_SIGN_CHAR is a workaround to support both GNU Make 4.3 and older versions.
|
||||
NUMBER_SIGN_CHAR := \#
|
||||
|
@ -61,6 +63,7 @@ C11_ATOMIC := $(shell sh -c 'echo "$(NUMBER_SIGN_CHAR)include <stdatomic.h>" > f
|
|||
ifeq ($(C11_ATOMIC),yes)
|
||||
STD+=-std=gnu11
|
||||
else
|
||||
SKIP_VEC_SETS=yes
|
||||
STD+=-std=c99
|
||||
endif
|
||||
|
||||
|
@ -115,12 +118,24 @@ else
|
|||
ifeq ($(SANITIZER),thread)
|
||||
CFLAGS+=-fsanitize=thread -fno-sanitize-recover=all -fno-omit-frame-pointer
|
||||
LDFLAGS+=-fsanitize=thread
|
||||
else
|
||||
ifeq ($(SANITIZER),memory)
|
||||
ifeq (clang, $(CLANG))
|
||||
export CXX:=clang
|
||||
export LD:=clang
|
||||
MALLOC=libc # MSan provides its own allocator so make sure not to use jemalloc as they clash
|
||||
CFLAGS+=-fsanitize=memory -fsanitize-memory-track-origins=2 -fno-sanitize-recover=all -fno-omit-frame-pointer
|
||||
LDFLAGS+=-fsanitize=memory
|
||||
else
|
||||
$(error "MemorySanitizer needs to be compiled and linked with clang. Please use CC=clang")
|
||||
endif
|
||||
else
|
||||
$(error "unknown sanitizer=${SANITIZER}")
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
# Override default settings if possible
|
||||
-include .make-settings
|
||||
|
@ -315,6 +330,12 @@ ifeq ($(BUILD_TLS),module)
|
|||
TLS_MODULE_CFLAGS+=-DUSE_OPENSSL=$(BUILD_MODULE) $(OPENSSL_CFLAGS) -DBUILD_TLS_MODULE=$(BUILD_MODULE)
|
||||
endif
|
||||
|
||||
ifneq ($(SKIP_VEC_SETS),yes)
|
||||
vpath %.c ../modules/vector-sets
|
||||
REDIS_VEC_SETS_OBJ=hnsw.o vset.o
|
||||
FINAL_CFLAGS+=-DINCLUDE_VEC_SETS=1
|
||||
endif
|
||||
|
||||
ifndef V
|
||||
define MAKE_INSTALL
|
||||
@printf ' %b %b\n' $(LINKCOLOR)INSTALL$(ENDCOLOR) $(BINCOLOR)$(1)$(ENDCOLOR) 1>&2
|
||||
|
@ -361,7 +382,7 @@ REDIS_BENCHMARK_NAME=redis-benchmark$(PROG_SUFFIX)
|
|||
REDIS_BENCHMARK_OBJ=ae.o anet.o redis-benchmark.o adlist.o dict.o zmalloc.o redisassert.o release.o crcspeed.o crccombine.o crc64.o siphash.o crc16.o monotonic.o cli_common.o mt19937-64.o strl.o
|
||||
REDIS_CHECK_RDB_NAME=redis-check-rdb$(PROG_SUFFIX)
|
||||
REDIS_CHECK_AOF_NAME=redis-check-aof$(PROG_SUFFIX)
|
||||
ALL_SOURCES=$(sort $(patsubst %.o,%.c,$(REDIS_SERVER_OBJ) $(REDIS_CLI_OBJ) $(REDIS_BENCHMARK_OBJ)))
|
||||
ALL_SOURCES=$(sort $(patsubst %.o,%.c,$(REDIS_SERVER_OBJ) $(REDIS_VEC_SETS_OBJ) $(REDIS_CLI_OBJ) $(REDIS_BENCHMARK_OBJ)))
|
||||
|
||||
all: $(REDIS_SERVER_NAME) $(REDIS_SENTINEL_NAME) $(REDIS_CLI_NAME) $(REDIS_BENCHMARK_NAME) $(REDIS_CHECK_RDB_NAME) $(REDIS_CHECK_AOF_NAME) $(TLS_MODULE)
|
||||
@echo ""
|
||||
|
@ -408,7 +429,7 @@ ifneq ($(strip $(PREV_FINAL_LDFLAGS)), $(strip $(FINAL_LDFLAGS)))
|
|||
endif
|
||||
|
||||
# redis-server
|
||||
$(REDIS_SERVER_NAME): $(REDIS_SERVER_OBJ)
|
||||
$(REDIS_SERVER_NAME): $(REDIS_SERVER_OBJ) $(REDIS_VEC_SETS_OBJ)
|
||||
$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/lua/src/liblua.a ../deps/hdr_histogram/libhdrhistogram.a ../deps/fpconv/libfpconv.a ../deps/fast_float/libfast_float.a $(FINAL_LIBS)
|
||||
|
||||
# redis-sentinel
|
||||
|
@ -435,7 +456,7 @@ $(REDIS_CLI_NAME): $(REDIS_CLI_OBJ)
|
|||
$(REDIS_BENCHMARK_NAME): $(REDIS_BENCHMARK_OBJ)
|
||||
$(REDIS_LD) -o $@ $^ ../deps/hiredis/libhiredis.a ../deps/hdr_histogram/libhdrhistogram.a $(FINAL_LIBS) $(TLS_CLIENT_LIBS)
|
||||
|
||||
DEP = $(REDIS_SERVER_OBJ:%.o=%.d) $(REDIS_CLI_OBJ:%.o=%.d) $(REDIS_BENCHMARK_OBJ:%.o=%.d)
|
||||
DEP = $(REDIS_SERVER_OBJ:%.o=%.d) $(REDIS_VEC_SETS_OBJ:%.o=%.d) $(REDIS_CLI_OBJ:%.o=%.d) $(REDIS_BENCHMARK_OBJ:%.o=%.d)
|
||||
-include $(DEP)
|
||||
|
||||
# Because the jemalloc.h header is generated as a part of the jemalloc build,
|
||||
|
@ -502,7 +523,7 @@ bench: $(REDIS_BENCHMARK_NAME)
|
|||
@echo ""
|
||||
@echo "WARNING: if it fails under Linux you probably need to install libc6-dev-i386"
|
||||
@echo ""
|
||||
$(MAKE) CFLAGS="-m32" LDFLAGS="-m32"
|
||||
$(MAKE) CFLAGS="-m32" LDFLAGS="-m32" SKIP_VEC_SETS="yes"
|
||||
|
||||
gcov:
|
||||
$(MAKE) REDIS_CFLAGS="-fprofile-arcs -ftest-coverage -DCOVERAGE_TEST" REDIS_LDFLAGS="-fprofile-arcs -ftest-coverage"
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
* Copyright (c) 2018-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#include "server.h"
|
||||
|
|
|
@ -3,8 +3,9 @@
|
|||
* Copyright (c) 2006-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
|
||||
|
|
|
@ -3,8 +3,9 @@
|
|||
* Copyright (c) 2006-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#ifndef __ADLIST_H__
|
||||
|
|
5
src/ae.c
5
src/ae.c
|
@ -5,8 +5,9 @@
|
|||
* Copyright (c) 2006-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#include "ae.h"
|
||||
|
|
5
src/ae.h
5
src/ae.h
|
@ -5,8 +5,9 @@
|
|||
* Copyright (c) 2006-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#ifndef __AE_H__
|
||||
|
|
|
@ -3,8 +3,9 @@
|
|||
* Copyright (c) 2009-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
|
||||
|
|
|
@ -3,8 +3,9 @@
|
|||
* Copyright (c) 2009-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
|
||||
|
|
|
@ -3,8 +3,9 @@
|
|||
* Copyright (c) 2006-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#include "fmacros.h"
|
||||
|
|
|
@ -3,8 +3,9 @@
|
|||
* Copyright (c) 2006-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#ifndef ANET_H
|
||||
|
|
24
src/aof.c
24
src/aof.c
|
@ -2,8 +2,9 @@
|
|||
* Copyright (c) 2009-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#include "server.h"
|
||||
|
@ -1724,7 +1725,10 @@ cleanup:
|
|||
if (fakeClient) freeClient(fakeClient);
|
||||
server.current_client = old_cur_client;
|
||||
server.executing_client = old_exec_client;
|
||||
int fd = dup(fileno(fp));
|
||||
fclose(fp);
|
||||
/* Reclaim page cache memory used by the AOF file in background. */
|
||||
if (fd >= 0) bioCreateCloseJob(fd, 0, 1);
|
||||
sdsfree(aof_filepath);
|
||||
return ret;
|
||||
}
|
||||
|
@ -2370,16 +2374,18 @@ int rewriteAppendOnlyFileRio(rio *aof) {
|
|||
kvs_it = kvstoreIteratorInit(db->keys);
|
||||
/* Iterate this DB writing every entry */
|
||||
while((de = kvstoreIteratorNext(kvs_it)) != NULL) {
|
||||
sds keystr;
|
||||
robj key, *o;
|
||||
long long expiretime;
|
||||
size_t aof_bytes_before_key = aof->processed_bytes;
|
||||
|
||||
keystr = dictGetKey(de);
|
||||
o = dictGetVal(de);
|
||||
initStaticStringObject(key,keystr);
|
||||
|
||||
expiretime = getExpire(db,&key);
|
||||
/* Get the value object (of type kvobj) */
|
||||
kvobj *o = dictGetKV(de);
|
||||
|
||||
/* Get the expire time */
|
||||
expiretime = kvobjGetExpire(o);
|
||||
|
||||
/* Set on stack string object for key */
|
||||
robj key;
|
||||
initStaticStringObject(key, kvobjGetKey(o));
|
||||
|
||||
/* Save the key and associated value */
|
||||
if (o->type == OBJ_STRING) {
|
||||
|
|
|
@ -2,14 +2,15 @@
|
|||
* Copyright (c) 2009-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
const char *ascii_logo =
|
||||
" _._ \n"
|
||||
" _.-``__ ''-._ \n"
|
||||
" _.-`` `. `_. ''-._ Redis Community Edition \n"
|
||||
" _.-`` `. `_. ''-._ Redis Open Source \n"
|
||||
" .-`` .-```. ```\\/ _.,_ ''-._ %s (%s/%d) %s bit\n"
|
||||
" ( ' , .-` | `, ) Running in %s mode\n"
|
||||
" |`-._`-...-` __...-.``-._|'` _.-'| Port: %d\n"
|
||||
|
|
|
@ -49,8 +49,9 @@
|
|||
* Copyright (c) 2015-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#include <pthread.h>
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
*
|
||||
* In the future we'll either continue implementing new things we need or
|
||||
* we'll switch to libeio. However there are probably long term uses for this
|
||||
* file as we may want to put here Redis specific background tasks.
|
||||
* file as we may want to put Redis specific background tasks here.
|
||||
*
|
||||
* DESIGN
|
||||
* ------
|
||||
|
@ -39,8 +39,9 @@
|
|||
* Copyright (c) 2009-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#include "server.h"
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
* Copyright (c) 2009-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#ifndef __BIO_H
|
||||
|
|
51
src/bitops.c
51
src/bitops.c
|
@ -3,8 +3,9 @@
|
|||
* Copyright (c) 2009-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#include "server.h"
|
||||
|
@ -493,16 +494,17 @@ int getBitfieldTypeFromArgument(client *c, robj *o, int *sign, int *bits) {
|
|||
*
|
||||
* (Must provide all the arguments to the function)
|
||||
*/
|
||||
static robj *lookupStringForBitCommand(client *c, uint64_t maxbit,
|
||||
static kvobj *lookupStringForBitCommand(client *c, uint64_t maxbit,
|
||||
size_t *strOldSize, size_t *strGrowSize)
|
||||
{
|
||||
dictEntryLink link;
|
||||
size_t byte = maxbit >> 3;
|
||||
robj *o = lookupKeyWrite(c->db,c->argv[1]);
|
||||
kvobj *o = lookupKeyWriteWithLink(c->db,c->argv[1],&link);
|
||||
if (checkType(c,o,OBJ_STRING)) return NULL;
|
||||
|
||||
if (o == NULL) {
|
||||
o = createObject(OBJ_STRING,sdsnewlen(NULL, byte+1));
|
||||
dbAdd(c->db,c->argv[1],o);
|
||||
dbAddByLink(c->db,c->argv[1],&o,&link);
|
||||
*strGrowSize = byte + 1;
|
||||
*strOldSize = 0;
|
||||
} else {
|
||||
|
@ -547,7 +549,6 @@ unsigned char *getObjectReadOnlyString(robj *o, long *len, char *llbuf) {
|
|||
|
||||
/* SETBIT key offset bitvalue */
|
||||
void setbitCommand(client *c) {
|
||||
robj *o;
|
||||
char *err = "bit is not an integer or out of range";
|
||||
uint64_t bitoffset;
|
||||
ssize_t byte, bit;
|
||||
|
@ -567,8 +568,8 @@ void setbitCommand(client *c) {
|
|||
}
|
||||
|
||||
size_t strOldSize, strGrowSize;
|
||||
if ((o = lookupStringForBitCommand(c,bitoffset,&strOldSize,&strGrowSize)) == NULL)
|
||||
return;
|
||||
kvobj *o = lookupStringForBitCommand(c, bitoffset, &strOldSize, &strGrowSize);
|
||||
if (o == NULL) return;
|
||||
|
||||
/* Get current values */
|
||||
byte = bitoffset >> 3;
|
||||
|
@ -602,7 +603,6 @@ void setbitCommand(client *c) {
|
|||
|
||||
/* GETBIT key offset */
|
||||
void getbitCommand(client *c) {
|
||||
robj *o;
|
||||
char llbuf[32];
|
||||
uint64_t bitoffset;
|
||||
size_t byte, bit;
|
||||
|
@ -611,16 +611,16 @@ void getbitCommand(client *c) {
|
|||
if (getBitOffsetFromArgument(c,c->argv[2],&bitoffset,0,0) != C_OK)
|
||||
return;
|
||||
|
||||
if ((o = lookupKeyReadOrReply(c,c->argv[1],shared.czero)) == NULL ||
|
||||
checkType(c,o,OBJ_STRING)) return;
|
||||
kvobj *kv = lookupKeyReadOrReply(c, c->argv[1], shared.czero);
|
||||
if (kv == NULL || checkType(c,kv,OBJ_STRING)) return;
|
||||
|
||||
byte = bitoffset >> 3;
|
||||
bit = 7 - (bitoffset & 0x7);
|
||||
if (sdsEncodedObject(o)) {
|
||||
if (byte < sdslen(o->ptr))
|
||||
bitval = ((uint8_t*)o->ptr)[byte] & (1 << bit);
|
||||
if (sdsEncodedObject(kv)) {
|
||||
if (byte < sdslen(kv->ptr))
|
||||
bitval = ((uint8_t*)kv->ptr)[byte] & (1 << bit);
|
||||
} else {
|
||||
if (byte < (size_t)ll2string(llbuf,sizeof(llbuf),(long)o->ptr))
|
||||
if (byte < (size_t)ll2string(llbuf,sizeof(llbuf),(long)kv->ptr))
|
||||
bitval = llbuf[byte] & (1 << bit);
|
||||
}
|
||||
|
||||
|
@ -631,7 +631,7 @@ void getbitCommand(client *c) {
|
|||
REDIS_NO_SANITIZE("alignment")
|
||||
void bitopCommand(client *c) {
|
||||
char *opname = c->argv[1]->ptr;
|
||||
robj *o, *targetkey = c->argv[2];
|
||||
robj *targetkey = c->argv[2];
|
||||
unsigned long op, j, numkeys;
|
||||
robj **objects; /* Array of source objects. */
|
||||
unsigned char **src; /* Array of source strings pointers. */
|
||||
|
@ -666,9 +666,9 @@ void bitopCommand(client *c) {
|
|||
len = zmalloc(sizeof(long) * numkeys);
|
||||
objects = zmalloc(sizeof(robj*) * numkeys);
|
||||
for (j = 0; j < numkeys; j++) {
|
||||
o = lookupKeyRead(c->db,c->argv[j+3]);
|
||||
kvobj *kv = lookupKeyRead(c->db, c->argv[j + 3]);
|
||||
/* Handle non-existing keys as empty strings. */
|
||||
if (o == NULL) {
|
||||
if (kv == NULL) {
|
||||
objects[j] = NULL;
|
||||
src[j] = NULL;
|
||||
len[j] = 0;
|
||||
|
@ -676,7 +676,7 @@ void bitopCommand(client *c) {
|
|||
continue;
|
||||
}
|
||||
/* Return an error if one of the keys is not a string. */
|
||||
if (checkType(c,o,OBJ_STRING)) {
|
||||
if (checkType(c, kv, OBJ_STRING)) {
|
||||
unsigned long i;
|
||||
for (i = 0; i < j; i++) {
|
||||
if (objects[i])
|
||||
|
@ -687,7 +687,7 @@ void bitopCommand(client *c) {
|
|||
zfree(objects);
|
||||
return;
|
||||
}
|
||||
objects[j] = getDecodedObject(o);
|
||||
objects[j] = getDecodedObject(kv);
|
||||
src[j] = objects[j]->ptr;
|
||||
len[j] = sdslen(objects[j]->ptr);
|
||||
if (len[j] > maxlen) maxlen = len[j];
|
||||
|
@ -804,10 +804,9 @@ void bitopCommand(client *c) {
|
|||
|
||||
/* Store the computed value into the target key */
|
||||
if (maxlen) {
|
||||
o = createObject(OBJ_STRING,res);
|
||||
setKey(c,c->db,targetkey,o,0);
|
||||
robj *o = createObject(OBJ_STRING, res);
|
||||
setKey(c, c->db, targetkey, &o, 0);
|
||||
notifyKeyspaceEvent(NOTIFY_STRING,"set",targetkey,c->db->id);
|
||||
decrRefCount(o);
|
||||
server.dirty++;
|
||||
} else if (dbDelete(c->db,targetkey)) {
|
||||
signalModifiedKey(c,c->db,targetkey);
|
||||
|
@ -819,7 +818,7 @@ void bitopCommand(client *c) {
|
|||
|
||||
/* BITCOUNT key [start end [BIT|BYTE]] */
|
||||
void bitcountCommand(client *c) {
|
||||
robj *o;
|
||||
kvobj *o;
|
||||
long long start, end;
|
||||
long strlen;
|
||||
unsigned char *p;
|
||||
|
@ -911,7 +910,7 @@ void bitcountCommand(client *c) {
|
|||
|
||||
/* BITPOS key bit [start [end [BIT|BYTE]]] */
|
||||
void bitposCommand(client *c) {
|
||||
robj *o;
|
||||
kvobj *o;
|
||||
long long start, end;
|
||||
long bit, strlen;
|
||||
unsigned char *p;
|
||||
|
@ -1076,7 +1075,7 @@ struct bitfieldOp {
|
|||
* when flags is set to BITFIELD_FLAG_READONLY: in this case only the
|
||||
* GET subcommand is allowed, other subcommands will return an error. */
|
||||
void bitfieldGeneric(client *c, int flags) {
|
||||
robj *o;
|
||||
kvobj *o;
|
||||
uint64_t bitoffset;
|
||||
int j, numops = 0, changes = 0;
|
||||
size_t strOldSize, strGrowSize = 0;
|
||||
|
|
|
@ -3,8 +3,14 @@
|
|||
* Copyright (c) 2009-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Copyright (c) 2024-present, Valkey contributors.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*
|
||||
* Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
|
||||
*
|
||||
* ---------------------------------------------------------------------------
|
||||
*
|
||||
|
@ -87,6 +93,7 @@ void updateStatsOnUnblock(client *c, long blocked_us, long reply_us, int had_err
|
|||
const ustime_t total_cmd_duration = c->duration + blocked_us + reply_us;
|
||||
c->lastcmd->microseconds += total_cmd_duration;
|
||||
c->lastcmd->calls++;
|
||||
c->commands_processed++;
|
||||
server.stat_numcommands++;
|
||||
if (had_errors)
|
||||
c->lastcmd->failed_calls++;
|
||||
|
@ -568,7 +575,7 @@ static void handleClientsBlockedOnKey(readyList *rl) {
|
|||
long count = listLength(clients);
|
||||
while ((ln = listNext(&li)) && count--) {
|
||||
client *receiver = listNodeValue(ln);
|
||||
robj *o = lookupKeyReadWithFlags(rl->db, rl->key, LOOKUP_NOEFFECTS);
|
||||
kvobj *o = lookupKeyReadWithFlags(rl->db, rl->key, LOOKUP_NOEFFECTS);
|
||||
/* 1. In case new key was added/touched we need to verify it satisfy the
|
||||
* blocked type, since we might process the wrong key type.
|
||||
* 2. We want to serve clients blocked on module keys
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
* Copyright (c) 2009-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#include "server.h"
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
* Copyright (c) 2009-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#ifndef SRC_CALL_REPLY_H_
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
* Copyright (c) 2016-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#include "server.h"
|
||||
|
|
|
@ -3,8 +3,9 @@
|
|||
* Copyright (c) 2020-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#include "fmacros.h"
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
* Copyright (c) 2009-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*
|
||||
* Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
|
||||
*/
|
||||
|
@ -130,7 +131,7 @@ int verifyDumpPayload(unsigned char *p, size_t len, uint16_t *rdbver_ptr) {
|
|||
* DUMP is actually not used by Redis Cluster but it is the obvious
|
||||
* complement of RESTORE and can be useful for different applications. */
|
||||
void dumpCommand(client *c) {
|
||||
robj *o;
|
||||
kvobj *o;
|
||||
rio payload;
|
||||
|
||||
/* Check if the key is here. */
|
||||
|
@ -238,14 +239,14 @@ void restoreCommand(client *c) {
|
|||
}
|
||||
|
||||
/* Create the key and set the TTL if any */
|
||||
dictEntry *de = dbAdd(c->db,key,obj);
|
||||
kvobj *kv = dbAdd(c->db, key, &obj);
|
||||
|
||||
/* If minExpiredField was set, then the object is hash with expiration
|
||||
* on fields and need to register it in global HFE DS */
|
||||
if (obj->type == OBJ_HASH) {
|
||||
uint64_t minExpiredField = hashTypeGetMinExpire(obj, 1);
|
||||
if (kv->type == OBJ_HASH) {
|
||||
uint64_t minExpiredField = hashTypeGetMinExpire(kv, 1);
|
||||
if (minExpiredField != EB_EXPIRE_TIME_INVALID)
|
||||
hashTypeAddToExpires(c->db, dictGetKey(de), obj, minExpiredField);
|
||||
hashTypeAddToExpires(c->db, kv, minExpiredField);
|
||||
}
|
||||
|
||||
if (ttl) {
|
||||
|
@ -258,7 +259,7 @@ void restoreCommand(client *c) {
|
|||
rewriteClientCommandArgument(c,c->argc,shared.absttl);
|
||||
}
|
||||
}
|
||||
objectSetLRUOrLFU(obj,lfu_freq,lru_idle,lru_clock,1000);
|
||||
objectSetLRUOrLFU(kv, lfu_freq, lru_idle, lru_clock, 1000);
|
||||
signalModifiedKey(c,c->db,key);
|
||||
notifyKeyspaceEvent(NOTIFY_GENERIC,"restore",key,c->db->id);
|
||||
addReply(c,shared.ok);
|
||||
|
@ -387,8 +388,8 @@ void migrateCommand(client *c) {
|
|||
char *password = NULL;
|
||||
long timeout;
|
||||
long dbid;
|
||||
robj **ov = NULL; /* Objects to migrate. */
|
||||
robj **kv = NULL; /* Key names. */
|
||||
robj **kvArray = NULL; /* Objects to migrate. */
|
||||
robj **keyArray = NULL; /* Key names. */
|
||||
robj **newargv = NULL; /* Used to rewrite the command as DEL ... keys ... */
|
||||
rio cmd, payload;
|
||||
int may_retry = 1;
|
||||
|
@ -452,19 +453,19 @@ void migrateCommand(client *c) {
|
|||
* the caller there was nothing to migrate. We don't return an error in
|
||||
* this case, since often this is due to a normal condition like the key
|
||||
* expiring in the meantime. */
|
||||
ov = zrealloc(ov,sizeof(robj*)*num_keys);
|
||||
kv = zrealloc(kv,sizeof(robj*)*num_keys);
|
||||
int oi = 0;
|
||||
kvArray = zrealloc(kvArray,sizeof(kvobj*)*num_keys);
|
||||
keyArray = zrealloc(keyArray,sizeof(robj*)*num_keys);
|
||||
int num_exists = 0;
|
||||
|
||||
for (j = 0; j < num_keys; j++) {
|
||||
if ((ov[oi] = lookupKeyRead(c->db,c->argv[first_key+j])) != NULL) {
|
||||
kv[oi] = c->argv[first_key+j];
|
||||
oi++;
|
||||
if ((kvArray[num_exists] = lookupKeyRead(c->db,c->argv[first_key+j])) != NULL) {
|
||||
keyArray[num_exists] = c->argv[first_key+j];
|
||||
num_exists++;
|
||||
}
|
||||
}
|
||||
num_keys = oi;
|
||||
num_keys = num_exists;
|
||||
if (num_keys == 0) {
|
||||
zfree(ov); zfree(kv);
|
||||
zfree(kvArray); zfree(keyArray);
|
||||
addReplySds(c,sdsnew("+NOKEY\r\n"));
|
||||
return;
|
||||
}
|
||||
|
@ -475,7 +476,7 @@ void migrateCommand(client *c) {
|
|||
/* Connect */
|
||||
cs = migrateGetSocket(c,c->argv[1],c->argv[2],timeout);
|
||||
if (cs == NULL) {
|
||||
zfree(ov); zfree(kv);
|
||||
zfree(kvArray); zfree(keyArray);
|
||||
return; /* error sent to the client by migrateGetSocket() */
|
||||
}
|
||||
|
||||
|
@ -510,7 +511,7 @@ void migrateCommand(client *c) {
|
|||
/* Create RESTORE payload and generate the protocol to call the command. */
|
||||
for (j = 0; j < num_keys; j++) {
|
||||
long long ttl = 0;
|
||||
long long expireat = getExpire(c->db,kv[j]);
|
||||
long long expireat = kvobjGetExpire(kvArray[j]);
|
||||
|
||||
if (expireat != -1) {
|
||||
ttl = expireat-commandTimeSnapshot();
|
||||
|
@ -523,8 +524,8 @@ void migrateCommand(client *c) {
|
|||
/* Relocate valid (non expired) keys and values into the array in successive
|
||||
* positions to remove holes created by the keys that were present
|
||||
* in the first lookup but are now expired after the second lookup. */
|
||||
ov[non_expired] = ov[j];
|
||||
kv[non_expired++] = kv[j];
|
||||
kvArray[non_expired] = kvArray[j];
|
||||
keyArray[non_expired++] = keyArray[j];
|
||||
|
||||
serverAssertWithInfo(c,NULL,
|
||||
rioWriteBulkCount(&cmd,'*',replace ? 5 : 4));
|
||||
|
@ -534,14 +535,14 @@ void migrateCommand(client *c) {
|
|||
rioWriteBulkString(&cmd,"RESTORE-ASKING",14));
|
||||
else
|
||||
serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,"RESTORE",7));
|
||||
serverAssertWithInfo(c,NULL,sdsEncodedObject(kv[j]));
|
||||
serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,kv[j]->ptr,
|
||||
sdslen(kv[j]->ptr)));
|
||||
serverAssertWithInfo(c,NULL,sdsEncodedObject(keyArray[j]));
|
||||
serverAssertWithInfo(c,NULL,rioWriteBulkString(&cmd,keyArray[j]->ptr,
|
||||
sdslen(keyArray[j]->ptr)));
|
||||
serverAssertWithInfo(c,NULL,rioWriteBulkLongLong(&cmd,ttl));
|
||||
|
||||
/* Emit the payload argument, that is the serialized object using
|
||||
* the DUMP format. */
|
||||
createDumpPayload(&payload,ov[j],kv[j],dbid);
|
||||
createDumpPayload(&payload,kvArray[j],keyArray[j],dbid);
|
||||
serverAssertWithInfo(c,NULL,
|
||||
rioWriteBulkString(&cmd,payload.io.buffer.ptr,
|
||||
sdslen(payload.io.buffer.ptr)));
|
||||
|
@ -621,14 +622,14 @@ void migrateCommand(client *c) {
|
|||
} else {
|
||||
if (!copy) {
|
||||
/* No COPY option: remove the local key, signal the change. */
|
||||
dbDelete(c->db,kv[j]);
|
||||
signalModifiedKey(c,c->db,kv[j]);
|
||||
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",kv[j],c->db->id);
|
||||
dbDelete(c->db,keyArray[j]);
|
||||
signalModifiedKey(c,c->db,keyArray[j]);
|
||||
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",keyArray[j],c->db->id);
|
||||
server.dirty++;
|
||||
|
||||
/* Populate the argument vector to replace the old one. */
|
||||
newargv[del_idx++] = kv[j];
|
||||
incrRefCount(kv[j]);
|
||||
newargv[del_idx++] = keyArray[j];
|
||||
incrRefCount(keyArray[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -686,7 +687,7 @@ void migrateCommand(client *c) {
|
|||
}
|
||||
|
||||
sdsfree(cmd.io.buffer.ptr);
|
||||
zfree(ov); zfree(kv); zfree(newargv);
|
||||
zfree(kvArray); zfree(keyArray); zfree(newargv);
|
||||
return;
|
||||
|
||||
/* On socket errors we try to close the cached socket and try again.
|
||||
|
@ -713,7 +714,7 @@ void migrateCommand(client *c) {
|
|||
}
|
||||
|
||||
/* Cleanup we want to do if no retry is attempted. */
|
||||
zfree(ov); zfree(kv);
|
||||
zfree(kvArray); zfree(keyArray);
|
||||
addReplyErrorSds(c, sdscatprintf(sdsempty(),
|
||||
"-IOERR error or timeout %s to target instance",
|
||||
write_error ? "writing" : "reading"));
|
||||
|
@ -1010,7 +1011,7 @@ void clusterCommand(client *c) {
|
|||
for (unsigned int i = 0; i < numkeys; i++) {
|
||||
de = kvstoreDictIteratorNext(kvs_di);
|
||||
serverAssert(de != NULL);
|
||||
sds sdskey = dictGetKey(de);
|
||||
sds sdskey = kvobjGetKey(dictGetKV(de));
|
||||
addReplyBulkCBuffer(c, sdskey, sdslen(sdskey));
|
||||
}
|
||||
kvstoreReleaseDictIterator(kvs_di);
|
||||
|
|
|
@ -107,9 +107,7 @@ clusterNode *getMyClusterNode(void);
|
|||
char *getMyClusterId(void);
|
||||
int getClusterSize(void);
|
||||
int getMyShardSlotCount(void);
|
||||
int handleDebugClusterCommand(client *c);
|
||||
int clusterNodePending(clusterNode *node);
|
||||
int clusterNodeIsMaster(clusterNode *n);
|
||||
char **getClusterNodesList(size_t *numnodes);
|
||||
int clusterNodeIsMaster(clusterNode *n);
|
||||
char *clusterNodeIp(clusterNode *node);
|
||||
|
|
|
@ -5,8 +5,9 @@
|
|||
* Copyright (c) 2024-present, Valkey contributors.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*
|
||||
* Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
|
||||
*/
|
||||
|
@ -2602,7 +2603,7 @@ uint32_t writePingExt(clusterMsg *hdr, int gossipcount) {
|
|||
totlen += getShardIdPingExtSize();
|
||||
extensions++;
|
||||
|
||||
/* Populate insternal secret */
|
||||
/* Populate internal secret */
|
||||
if (cursor != NULL) {
|
||||
clusterMsgPingExtInternalSecret *ext = preparePingExt(cursor, CLUSTERMSG_EXT_TYPE_INTERNALSECRET, getInternalSecretPingExtSize());
|
||||
memcpy(ext->internal_secret, server.cluster->internal_secret, CLUSTER_INTERNALSECRETLEN);
|
||||
|
@ -5792,7 +5793,7 @@ unsigned int delKeysInSlot(unsigned int hashslot) {
|
|||
kvs_di = kvstoreGetDictSafeIterator(server.db->keys, hashslot);
|
||||
while((de = kvstoreDictIteratorNext(kvs_di)) != NULL) {
|
||||
enterExecutionUnit(1, 0);
|
||||
sds sdskey = dictGetKey(de);
|
||||
sds sdskey = kvobjGetKey(dictGetKV(de));
|
||||
robj *key = createStringObject(sdskey, sdslen(sdskey));
|
||||
dbDelete(&server.db[0], key);
|
||||
propagateDeletion(&server.db[0], key, server.lazyfree_lazy_server_del);
|
||||
|
|
|
@ -5,8 +5,9 @@
|
|||
* Copyright (c) 2024-present, Valkey contributors.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*
|
||||
* Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
|
||||
*/
|
||||
|
@ -231,7 +232,7 @@ typedef struct {
|
|||
uint16_t ver; /* Protocol version, currently set to 1. */
|
||||
uint16_t port; /* Primary port number (TCP or TLS). */
|
||||
uint16_t type; /* Message type */
|
||||
uint16_t count; /* Only used for some kind of messages. */
|
||||
uint16_t count; /* Only used for some kinds of messages. */
|
||||
uint64_t currentEpoch; /* The epoch accordingly to the sending node. */
|
||||
uint64_t configEpoch; /* The config epoch if it's a master, or the last
|
||||
epoch advertised by its master if it is a
|
||||
|
@ -258,8 +259,8 @@ typedef struct {
|
|||
* especially during cluster rolling upgrades.
|
||||
*
|
||||
* Therefore, fields in this struct should remain at the same offset from
|
||||
* release to release. The static asserts below ensures that incompatible
|
||||
* changes in clusterMsg be caught at compile time.
|
||||
* release to release. The static asserts below ensure that incompatible
|
||||
* changes in clusterMsg are caught at compile time.
|
||||
*/
|
||||
|
||||
static_assert(offsetof(clusterMsg, sig) == 0, "unexpected field offset");
|
||||
|
|
13
src/config.c
13
src/config.c
|
@ -6,8 +6,9 @@
|
|||
* Copyright (c) 2024-present, Valkey contributors.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*
|
||||
* Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
|
||||
*/
|
||||
|
@ -1578,6 +1579,9 @@ void rewriteConfigLoadmoduleOption(struct rewriteConfigState *state) {
|
|||
dictEntry *de;
|
||||
while ((de = dictNext(di)) != NULL) {
|
||||
struct RedisModule *module = dictGetVal(de);
|
||||
/* Internal modules doesn't have path and are not part of the configuration file */
|
||||
if (sdslen(module->loadmod->path) == 0) continue;
|
||||
|
||||
line = sdsnew("loadmodule ");
|
||||
line = sdscatsds(line, module->loadmod->path);
|
||||
for (int i = 0; i < module->loadmod->argc; i++) {
|
||||
|
@ -2457,6 +2461,7 @@ static int updatePort(const char **err) {
|
|||
listener->bindaddr = server.bindaddr;
|
||||
listener->bindaddr_count = server.bindaddr_count;
|
||||
listener->port = server.port;
|
||||
clusterUpdateMyselfAnnouncedPorts();
|
||||
listener->ct = connectionByType(CONN_TYPE_SOCKET);
|
||||
if (changeListener(listener) == C_ERR) {
|
||||
*err = "Unable to listen on this port. Check server logs.";
|
||||
|
@ -2684,6 +2689,7 @@ static int applyTLSPort(const char **err) {
|
|||
listener->bindaddr_count = server.bindaddr_count;
|
||||
listener->port = server.tls_port;
|
||||
listener->ct = connectionByType(CONN_TYPE_TLS);
|
||||
clusterUpdateMyselfAnnouncedPorts();
|
||||
if (changeListener(listener) == C_ERR) {
|
||||
*err = "Unable to listen on this port. Check server logs.";
|
||||
return 0;
|
||||
|
@ -3063,7 +3069,6 @@ standardConfig static_configs[] = {
|
|||
/* Bool configs */
|
||||
createBoolConfig("rdbchecksum", NULL, IMMUTABLE_CONFIG, server.rdb_checksum, 1, NULL, NULL),
|
||||
createBoolConfig("daemonize", NULL, IMMUTABLE_CONFIG, server.daemonize, 0, NULL, NULL),
|
||||
createBoolConfig("io-threads-do-reads", NULL, DEBUG_CONFIG | IMMUTABLE_CONFIG, server.io_threads_do_reads, 0,NULL, NULL), /* Read + parse from threads? */
|
||||
createBoolConfig("always-show-logo", NULL, IMMUTABLE_CONFIG, server.always_show_logo, 0, NULL, NULL),
|
||||
createBoolConfig("protected-mode", NULL, MODIFIABLE_CONFIG, server.protected_mode, 1, NULL, NULL),
|
||||
createBoolConfig("rdbcompression", NULL, MODIFIABLE_CONFIG, server.rdb_compression, 1, NULL, NULL),
|
||||
|
@ -3247,7 +3252,7 @@ standardConfig static_configs[] = {
|
|||
/* Other configs */
|
||||
createTimeTConfig("repl-backlog-ttl", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, server.repl_backlog_time_limit, 60*60, INTEGER_CONFIG, NULL, NULL), /* Default: 1 hour */
|
||||
createOffTConfig("auto-aof-rewrite-min-size", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, server.aof_rewrite_min_size, 64*1024*1024, MEMORY_CONFIG, NULL, NULL),
|
||||
createOffTConfig("loading-process-events-interval-bytes", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 1024, INT_MAX, server.loading_process_events_interval_bytes, 1024*1024*2, INTEGER_CONFIG, NULL, NULL),
|
||||
createOffTConfig("loading-process-events-interval-bytes", NULL, MODIFIABLE_CONFIG | HIDDEN_CONFIG, 1024, INT_MAX, server.loading_process_events_interval_bytes, 1024*512, INTEGER_CONFIG, NULL, NULL),
|
||||
|
||||
createIntConfig("tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, server.tls_port, 0, INTEGER_CONFIG, NULL, applyTLSPort), /* TCP port. */
|
||||
createIntConfig("tls-session-cache-size", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, server.tls_ctx_config.session_cache_size, 20*1024, INTEGER_CONFIG, NULL, applyTlsCfg),
|
||||
|
|
11
src/config.h
11
src/config.h
|
@ -2,8 +2,9 @@
|
|||
* Copyright (c) 2009-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#ifndef __CONFIG_H
|
||||
|
@ -167,6 +168,12 @@
|
|||
#define REDIS_NO_SANITIZE(sanitizer)
|
||||
#endif
|
||||
|
||||
#if defined(__clang__)
|
||||
#define REDIS_NO_SANITIZE_MSAN(sanitizer) REDIS_NO_SANITIZE(sanitizer)
|
||||
#else
|
||||
#define REDIS_NO_SANITIZE_MSAN(sanitizer)
|
||||
#endif
|
||||
|
||||
/* Define rdb_fsync_range to sync_file_range() on Linux, otherwise we use
|
||||
* the plain fsync() call. */
|
||||
#if (defined(__linux__) && defined(SYNC_FILE_RANGE_WAIT_BEFORE))
|
||||
|
|
|
@ -3,8 +3,9 @@
|
|||
* Copyright (c) 2019-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#ifndef __REDIS_CONNECTION_H
|
||||
|
|
|
@ -3,8 +3,9 @@
|
|||
* Copyright (c) 2019-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#ifndef __REDIS_CONNHELPERS_H
|
||||
|
|
89
src/debug.c
89
src/debug.c
|
@ -5,8 +5,9 @@
|
|||
* Copyright (c) 2024-present, Valkey contributors.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*
|
||||
* Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
|
||||
*/
|
||||
|
@ -130,7 +131,7 @@ void mixStringObjectDigest(unsigned char *digest, robj *o) {
|
|||
void xorObjectDigest(redisDb *db, robj *keyobj, unsigned char *digest, robj *o) {
|
||||
uint32_t aux = htonl(o->type);
|
||||
mixDigest(digest,&aux,sizeof(aux));
|
||||
long long expiretime = getExpire(db,keyobj);
|
||||
long long expiretime = getExpire(db, keyobj->ptr, NULL);
|
||||
char buf[128];
|
||||
|
||||
/* Save the key and associated value */
|
||||
|
@ -289,17 +290,16 @@ void computeDatasetDigest(unsigned char *final) {
|
|||
|
||||
/* Iterate this DB writing every entry */
|
||||
while((de = kvstoreIteratorNext(kvs_it)) != NULL) {
|
||||
sds key;
|
||||
robj *keyobj, *o;
|
||||
robj *keyobj;
|
||||
|
||||
memset(digest,0,20); /* This key-val digest */
|
||||
key = dictGetKey(de);
|
||||
kvobj *kv = dictGetKV(de);
|
||||
sds key = kvobjGetKey(kv);
|
||||
keyobj = createStringObject(key,sdslen(key));
|
||||
|
||||
mixDigest(digest,key,sdslen(key));
|
||||
|
||||
o = dictGetVal(de);
|
||||
xorObjectDigest(db,keyobj,digest,o);
|
||||
xorObjectDigest(db, keyobj, digest, kv);
|
||||
|
||||
/* We can finally xor the key-val digest to the final digest */
|
||||
xorDigest(final,digest,20);
|
||||
|
@ -604,22 +604,21 @@ NULL
|
|||
server.cluster_drop_packet_filter = packet_type;
|
||||
addReply(c,shared.ok);
|
||||
} else if (!strcasecmp(c->argv[1]->ptr,"object") && c->argc == 3) {
|
||||
dictEntry *de;
|
||||
robj *val;
|
||||
kvobj *kv;
|
||||
char *strenc;
|
||||
|
||||
if ((de = dbFind(c->db, c->argv[2]->ptr)) == NULL) {
|
||||
if ((kv = dbFind(c->db, c->argv[2]->ptr)) == NULL) {
|
||||
addReplyErrorObject(c,shared.nokeyerr);
|
||||
return;
|
||||
}
|
||||
val = dictGetVal(de);
|
||||
strenc = strEncoding(val->encoding);
|
||||
|
||||
strenc = strEncoding(kv->encoding);
|
||||
|
||||
char extra[138] = {0};
|
||||
if (val->encoding == OBJ_ENCODING_QUICKLIST) {
|
||||
if (kv->encoding == OBJ_ENCODING_QUICKLIST) {
|
||||
char *nextra = extra;
|
||||
int remaining = sizeof(extra);
|
||||
quicklist *ql = val->ptr;
|
||||
quicklist *ql = kv->ptr;
|
||||
/* Add number of quicklist nodes */
|
||||
int used = snprintf(nextra, remaining, " ql_nodes:%lu", ql->len);
|
||||
nextra += used;
|
||||
|
@ -652,38 +651,42 @@ NULL
|
|||
"Value at:%p refcount:%d "
|
||||
"encoding:%s serializedlength:%zu "
|
||||
"lru:%d lru_seconds_idle:%llu%s",
|
||||
(void*)val, val->refcount,
|
||||
strenc, rdbSavedObjectLen(val, c->argv[2], c->db->id),
|
||||
val->lru, estimateObjectIdleTime(val)/1000, extra);
|
||||
(void*)kv, kv->refcount,
|
||||
strenc, rdbSavedObjectLen(kv, c->argv[2], c->db->id),
|
||||
kv->lru, estimateObjectIdleTime(kv)/1000, extra);
|
||||
} else if (!strcasecmp(c->argv[1]->ptr,"sdslen") && c->argc == 3) {
|
||||
dictEntry *de;
|
||||
robj *val;
|
||||
sds key;
|
||||
kvobj *kv;
|
||||
|
||||
if ((de = dbFind(c->db, c->argv[2]->ptr)) == NULL) {
|
||||
if ((kv = dbFind(c->db, c->argv[2]->ptr)) == NULL) {
|
||||
addReplyErrorObject(c,shared.nokeyerr);
|
||||
return;
|
||||
}
|
||||
val = dictGetVal(de);
|
||||
key = dictGetKey(de);
|
||||
|
||||
if (val->type != OBJ_STRING || !sdsEncodedObject(val)) {
|
||||
|
||||
val = kv;
|
||||
key = kvobjGetKey(kv);
|
||||
if (kv->type != OBJ_STRING || !sdsEncodedObject(val)) {
|
||||
addReplyError(c,"Not an sds encoded string.");
|
||||
} else {
|
||||
/* The key's allocation size reflects the entire robj allocation.
|
||||
* For embedded values, report an allocation size of 0. */
|
||||
size_t obj_alloc = zmalloc_usable_size(val);
|
||||
size_t val_alloc = val->encoding == OBJ_ENCODING_RAW ? sdsAllocSize(val->ptr) : 0;
|
||||
addReplyStatusFormat(c,
|
||||
"key_sds_len:%lld, key_sds_avail:%lld, key_zmalloc: %lld, "
|
||||
"val_sds_len:%lld, val_sds_avail:%lld, val_zmalloc: %lld",
|
||||
(long long) sdslen(key),
|
||||
(long long) sdsavail(key),
|
||||
(long long) sdsZmallocSize(key),
|
||||
(long long) obj_alloc,
|
||||
(long long) sdslen(val->ptr),
|
||||
(long long) sdsavail(val->ptr),
|
||||
(long long) getStringObjectSdsUsedMemory(val));
|
||||
(long long) val_alloc);
|
||||
}
|
||||
} else if (!strcasecmp(c->argv[1]->ptr,"listpack") && c->argc == 3) {
|
||||
robj *o;
|
||||
kvobj *o;
|
||||
|
||||
if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nokeyerr))
|
||||
if ((o = kvobjCommandLookupOrReply(c, c->argv[2], shared.nokeyerr))
|
||||
== NULL) return;
|
||||
|
||||
if (o->encoding != OBJ_ENCODING_LISTPACK && o->encoding != OBJ_ENCODING_LISTPACK_EX) {
|
||||
|
@ -697,9 +700,9 @@ NULL
|
|||
addReplyStatus(c,"Listpack structure printed on stdout");
|
||||
}
|
||||
} else if (!strcasecmp(c->argv[1]->ptr,"quicklist") && (c->argc == 3 || c->argc == 4)) {
|
||||
robj *o;
|
||||
kvobj *o;
|
||||
|
||||
if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nokeyerr))
|
||||
if ((o = kvobjCommandLookupOrReply(c, c->argv[2], shared.nokeyerr))
|
||||
== NULL) return;
|
||||
|
||||
int full = 0;
|
||||
|
@ -749,7 +752,7 @@ NULL
|
|||
val = createStringObject(NULL,valsize);
|
||||
memcpy(val->ptr, buf, valsize<=buflen? valsize: buflen);
|
||||
}
|
||||
dbAdd(c->db,key,val);
|
||||
dbAdd(c->db, key, &val);
|
||||
signalModifiedKey(c,c->db,key);
|
||||
decrRefCount(key);
|
||||
}
|
||||
|
@ -781,8 +784,7 @@ NULL
|
|||
|
||||
/* We don't use lookupKey because a debug command should
|
||||
* work on logically expired keys */
|
||||
dictEntry *de;
|
||||
robj *o = ((de = dbFind(c->db, c->argv[j]->ptr)) == NULL) ? NULL : dictGetVal(de);
|
||||
kvobj *o = dbFind(c->db, c->argv[j]->ptr);
|
||||
if (o) xorObjectDigest(c->db,c->argv[j],digest,o);
|
||||
|
||||
sds d = sdsempty();
|
||||
|
@ -900,7 +902,7 @@ NULL
|
|||
sds sizes = sdsempty();
|
||||
sizes = sdscatprintf(sizes,"bits:%d ",(sizeof(void*) == 8)?64:32);
|
||||
sizes = sdscatprintf(sizes,"robj:%d ",(int)sizeof(robj));
|
||||
sizes = sdscatprintf(sizes,"dictentry:%d ",(int)dictEntryMemUsage());
|
||||
sizes = sdscatprintf(sizes,"dictentry:%d ",(int)dictEntryMemUsage(0));
|
||||
sizes = sdscatprintf(sizes,"sdshdr5:%d ",(int)sizeof(struct sdshdr5));
|
||||
sizes = sdscatprintf(sizes,"sdshdr8:%d ",(int)sizeof(struct sdshdr8));
|
||||
sizes = sdscatprintf(sizes,"sdshdr16:%d ",(int)sizeof(struct sdshdr16));
|
||||
|
@ -936,14 +938,14 @@ NULL
|
|||
addReplyVerbatim(c,stats,sdslen(stats),"txt");
|
||||
sdsfree(stats);
|
||||
} else if (!strcasecmp(c->argv[1]->ptr,"htstats-key") && c->argc >= 3) {
|
||||
robj *o;
|
||||
kvobj *o;
|
||||
dict *ht = NULL;
|
||||
int full = 0;
|
||||
|
||||
if (c->argc >= 4 && !strcasecmp(c->argv[3]->ptr,"full"))
|
||||
full = 1;
|
||||
|
||||
if ((o = objectCommandLookupOrReply(c,c->argv[2],shared.nokeyerr))
|
||||
if ((o = kvobjCommandLookupOrReply(c,c->argv[2],shared.nokeyerr))
|
||||
== NULL) return;
|
||||
|
||||
/* Get the hash table reference from the object, if possible. */
|
||||
|
@ -1377,6 +1379,7 @@ static void* getAndSetMcontextEip(ucontext_t *uc, void *eip) {
|
|||
#undef NOT_SUPPORTED
|
||||
}
|
||||
|
||||
REDIS_NO_SANITIZE_MSAN("memory")
|
||||
REDIS_NO_SANITIZE("address")
|
||||
void logStackContent(void **sp) {
|
||||
if (server.hide_user_data_from_log) {
|
||||
|
@ -2188,15 +2191,11 @@ void logCurrentClient(client *cc, const char *title) {
|
|||
/* Check if the first argument, usually a key, is found inside the
|
||||
* selected DB, and if so print info about the associated object. */
|
||||
if (cc->argc > 1) {
|
||||
robj *val, *key;
|
||||
dictEntry *de;
|
||||
|
||||
key = getDecodedObject(cc->argv[1]);
|
||||
de = dbFind(cc->db, key->ptr);
|
||||
if (de) {
|
||||
val = dictGetVal(de);
|
||||
robj *key = getDecodedObject(cc->argv[1]);
|
||||
kvobj *kv = dbFind(cc->db, key->ptr);
|
||||
if (kv) {
|
||||
serverLog(LL_WARNING,"key '%s' found in DB containing the following object:", (char*)key->ptr);
|
||||
serverLogObjectDebugInfo(val);
|
||||
serverLogObjectDebugInfo(kv);
|
||||
}
|
||||
decrRefCount(key);
|
||||
}
|
||||
|
@ -2728,7 +2727,7 @@ static size_t get_ready_to_signal_threads_tids(int sig_num, pid_t tids[TIDS_MAX_
|
|||
pid_t calling_tid = syscall(SYS_gettid);
|
||||
int current_thread_index = -1;
|
||||
long nread;
|
||||
char buff[PATH_MAX];
|
||||
char buff[PATH_MAX] = {0};
|
||||
|
||||
/* readdir() is not async-signal-safe (AS-safe).
|
||||
Hence, we read the file using SYS_getdents64, which is considered AS-sync*/
|
||||
|
|
|
@ -5,8 +5,9 @@
|
|||
* Copyright (c) 2016-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#ifndef _REDIS_DEBUGMACRO_H_
|
||||
|
|
186
src/defrag.c
186
src/defrag.c
|
@ -11,8 +11,9 @@
|
|||
* Copyright (c) 2024-present, Valkey contributors.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*
|
||||
* Portions of this file are available under BSD3 terms; see REDISCONTRIBUTIONS for more information.
|
||||
*/
|
||||
|
@ -218,12 +219,12 @@ void *activeDefragHfieldAndUpdateRef(void *ptr, void *privdata) {
|
|||
/* We can't search in dict for that key after we've released
|
||||
* the pointer it holds, since it won't be able to do the string
|
||||
* compare, but we can find the entry using key hash and pointer. */
|
||||
dictEntryLink link;
|
||||
dictUseStoredKeyApi(d, 1);
|
||||
uint64_t hash = dictGetHash(d, newhf);
|
||||
link = dictFindLink(d, newhf, NULL);
|
||||
dictUseStoredKeyApi(d, 0);
|
||||
dictEntry *de = dictFindByHashAndPtr(d, ptr, hash);
|
||||
serverAssert(de);
|
||||
dictSetKey(d, de, newhf);
|
||||
serverAssert(link);
|
||||
dictSetKeyAtLink(d, newhf, &link, 0);
|
||||
}
|
||||
|
||||
return newhf;
|
||||
|
@ -404,14 +405,16 @@ void activeDefragZsetEntry(zset *zs, dictEntry *de) {
|
|||
#define DEFRAG_SDS_DICT_VAL_VOID_PTR 3
|
||||
#define DEFRAG_SDS_DICT_VAL_LUA_SCRIPT 4
|
||||
|
||||
void activeDefragSdsDictCallback(void *privdata, const dictEntry *de) {
|
||||
void activeDefragSdsDictCallback(void *privdata, const dictEntry *de, dictEntryLink plink) {
|
||||
UNUSED(plink);
|
||||
UNUSED(privdata);
|
||||
UNUSED(de);
|
||||
}
|
||||
|
||||
void activeDefragHfieldDictCallback(void *privdata, const dictEntry *de) {
|
||||
void activeDefragHfieldDictCallback(void *privdata, const dictEntry *de, dictEntryLink plink) {
|
||||
UNUSED(plink);
|
||||
dict *d = privdata;
|
||||
hfield newhf, hf = dictGetKey(de);
|
||||
hfield newhf = NULL, hf = dictGetKey(de);
|
||||
|
||||
if (hfieldGetExpireTime(hf) == EB_EXPIRE_TIME_INVALID) {
|
||||
/* If the hfield does not have TTL, we directly defrag it. */
|
||||
|
@ -421,6 +424,8 @@ void activeDefragHfieldDictCallback(void *privdata, const dictEntry *de) {
|
|||
/* Skip fields with TTL here, they will be defragmented later during
|
||||
* the hash expiry ebuckets defragmentation phase. */
|
||||
}
|
||||
|
||||
if (newhf) dictSetKey(d, (dictEntry *) de, newhf);
|
||||
}
|
||||
|
||||
/* Defrag a dict with sds key and optional value (either ptr, sds or robj string) */
|
||||
|
@ -498,13 +503,13 @@ void activeDefragQuickListNodes(quicklist *ql) {
|
|||
/* when the value has lots of elements, we want to handle it later and not as
|
||||
* part of the main dictionary scan. this is needed in order to prevent latency
|
||||
* spikes when handling large items */
|
||||
void defragLater(defragKeysCtx *ctx, dictEntry *kde) {
|
||||
void defragLater(defragKeysCtx *ctx, kvobj *kv) {
|
||||
if (!ctx->defrag_later) {
|
||||
ctx->defrag_later = listCreate();
|
||||
listSetFreeMethod(ctx->defrag_later, sdsfreegeneric);
|
||||
ctx->defrag_later_cursor = 0;
|
||||
}
|
||||
sds key = sdsdup(dictGetKey(kde));
|
||||
sds key = sdsdup(kvobjGetKey(kv));
|
||||
listAddNodeTail(ctx->defrag_later, key);
|
||||
}
|
||||
|
||||
|
@ -555,7 +560,8 @@ typedef struct {
|
|||
zset *zs;
|
||||
} scanLaterZsetData;
|
||||
|
||||
void scanLaterZsetCallback(void *privdata, const dictEntry *_de) {
|
||||
void scanLaterZsetCallback(void *privdata, const dictEntry *_de, dictEntryLink plink) {
|
||||
UNUSED(plink);
|
||||
dictEntry *de = (dictEntry*)_de;
|
||||
scanLaterZsetData *data = privdata;
|
||||
activeDefragZsetEntry(data->zs, de);
|
||||
|
@ -572,7 +578,8 @@ void scanLaterZset(robj *ob, unsigned long *cursor) {
|
|||
}
|
||||
|
||||
/* Used as scan callback when all the work is done in the dictDefragFunctions. */
|
||||
void scanCallbackCountScanned(void *privdata, const dictEntry *de) {
|
||||
void scanCallbackCountScanned(void *privdata, const dictEntry *de, dictEntryLink plink) {
|
||||
UNUSED(plink);
|
||||
UNUSED(privdata);
|
||||
UNUSED(de);
|
||||
server.stat_active_defrag_scanned++;
|
||||
|
@ -633,20 +640,18 @@ void scanLaterHash(robj *ob, unsigned long *cursor) {
|
|||
}
|
||||
}
|
||||
|
||||
void defragQuicklist(defragKeysCtx *ctx, dictEntry *kde) {
|
||||
robj *ob = dictGetVal(kde);
|
||||
quicklist *ql = ob->ptr, *newql;
|
||||
serverAssert(ob->type == OBJ_LIST && ob->encoding == OBJ_ENCODING_QUICKLIST);
|
||||
void defragQuicklist(defragKeysCtx *ctx, kvobj *kv) {
|
||||
quicklist *ql = kv->ptr, *newql;
|
||||
serverAssert(kv->type == OBJ_LIST && kv->encoding == OBJ_ENCODING_QUICKLIST);
|
||||
if ((newql = activeDefragAlloc(ql)))
|
||||
ob->ptr = ql = newql;
|
||||
kv->ptr = ql = newql;
|
||||
if (ql->len > server.active_defrag_max_scan_fields)
|
||||
defragLater(ctx, kde);
|
||||
defragLater(ctx, kv);
|
||||
else
|
||||
activeDefragQuickListNodes(ql);
|
||||
}
|
||||
|
||||
void defragZsetSkiplist(defragKeysCtx *ctx, dictEntry *kde) {
|
||||
robj *ob = dictGetVal(kde);
|
||||
void defragZsetSkiplist(defragKeysCtx *ctx, kvobj *ob) {
|
||||
zset *zs = (zset*)ob->ptr;
|
||||
zset *newzs;
|
||||
zskiplist *newzsl;
|
||||
|
@ -661,7 +666,7 @@ void defragZsetSkiplist(defragKeysCtx *ctx, dictEntry *kde) {
|
|||
if ((newheader = activeDefragAlloc(zs->zsl->header)))
|
||||
zs->zsl->header = newheader;
|
||||
if (dictSize(zs->dict) > server.active_defrag_max_scan_fields)
|
||||
defragLater(ctx, kde);
|
||||
defragLater(ctx, ob);
|
||||
else {
|
||||
dictIterator *di = dictGetIterator(zs->dict);
|
||||
while((de = dictNext(di)) != NULL) {
|
||||
|
@ -674,13 +679,12 @@ void defragZsetSkiplist(defragKeysCtx *ctx, dictEntry *kde) {
|
|||
zs->dict = newdict;
|
||||
}
|
||||
|
||||
void defragHash(defragKeysCtx *ctx, dictEntry *kde) {
|
||||
robj *ob = dictGetVal(kde);
|
||||
void defragHash(defragKeysCtx *ctx, kvobj *ob) {
|
||||
dict *d, *newd;
|
||||
serverAssert(ob->type == OBJ_HASH && ob->encoding == OBJ_ENCODING_HT);
|
||||
d = ob->ptr;
|
||||
if (dictSize(d) > server.active_defrag_max_scan_fields)
|
||||
defragLater(ctx, kde);
|
||||
defragLater(ctx, ob);
|
||||
else
|
||||
activeDefragHfieldDict(d);
|
||||
/* defrag the dict struct and tables */
|
||||
|
@ -688,13 +692,12 @@ void defragHash(defragKeysCtx *ctx, dictEntry *kde) {
|
|||
ob->ptr = newd;
|
||||
}
|
||||
|
||||
void defragSet(defragKeysCtx *ctx, dictEntry *kde) {
|
||||
robj *ob = dictGetVal(kde);
|
||||
void defragSet(defragKeysCtx *ctx, kvobj *ob) {
|
||||
dict *d, *newd;
|
||||
serverAssert(ob->type == OBJ_SET && ob->encoding == OBJ_ENCODING_HT);
|
||||
d = ob->ptr;
|
||||
if (dictSize(d) > server.active_defrag_max_scan_fields)
|
||||
defragLater(ctx, kde);
|
||||
defragLater(ctx, ob);
|
||||
else
|
||||
activeDefragSdsDict(d, DEFRAG_SDS_DICT_NO_VAL);
|
||||
/* defrag the dict struct and tables */
|
||||
|
@ -849,8 +852,7 @@ void* defragStreamConsumerGroup(raxIterator *ri, void *privdata) {
|
|||
return NULL;
|
||||
}
|
||||
|
||||
void defragStream(defragKeysCtx *ctx, dictEntry *kde) {
|
||||
robj *ob = dictGetVal(kde);
|
||||
void defragStream(defragKeysCtx *ctx, kvobj *ob) {
|
||||
serverAssert(ob->type == OBJ_STREAM && ob->encoding == OBJ_ENCODING_STREAM);
|
||||
stream *s = ob->ptr, *news;
|
||||
|
||||
|
@ -862,7 +864,7 @@ void defragStream(defragKeysCtx *ctx, dictEntry *kde) {
|
|||
rax *newrax = activeDefragAlloc(s->rax);
|
||||
if (newrax)
|
||||
s->rax = newrax;
|
||||
defragLater(ctx, kde);
|
||||
defragLater(ctx, ob);
|
||||
} else
|
||||
defragRadixTree(&s->rax, 1, NULL, NULL);
|
||||
|
||||
|
@ -873,49 +875,43 @@ void defragStream(defragKeysCtx *ctx, dictEntry *kde) {
|
|||
/* Defrag a module key. This is either done immediately or scheduled
|
||||
* for later. Returns then number of pointers defragged.
|
||||
*/
|
||||
void defragModule(defragKeysCtx *ctx, redisDb *db, dictEntry *kde) {
|
||||
robj *obj = dictGetVal(kde);
|
||||
serverAssert(obj->type == OBJ_MODULE);
|
||||
void defragModule(defragKeysCtx *ctx, redisDb *db, kvobj *kv) {
|
||||
serverAssert(kv->type == OBJ_MODULE);
|
||||
robj keyobj;
|
||||
initStaticStringObject(keyobj, dictGetKey(kde));
|
||||
if (!moduleDefragValue(&keyobj, obj, db->id))
|
||||
defragLater(ctx, kde);
|
||||
initStaticStringObject(keyobj, kvobjGetKey(kv));
|
||||
if (!moduleDefragValue(&keyobj, kv, db->id))
|
||||
defragLater(ctx, kv);
|
||||
}
|
||||
|
||||
/* for each key we scan in the main dict, this function will attempt to defrag
|
||||
* all the various pointers it has. */
|
||||
void defragKey(defragKeysCtx *ctx, dictEntry *de) {
|
||||
void defragKey(defragKeysCtx *ctx, dictEntry *de, dictEntryLink link) {
|
||||
UNUSED(link);
|
||||
dictEntryLink exlink = NULL;
|
||||
kvobj *kvnew, *ob = dictGetKV(de);
|
||||
redisDb *db = &server.db[ctx->dbid];
|
||||
int slot = ctx->kvstate.slot;
|
||||
sds keysds = dictGetKey(de);
|
||||
robj *newob, *ob = dictGetVal(de);
|
||||
unsigned char *newzl;
|
||||
sds newsds;
|
||||
|
||||
/* Try to defrag the key name. */
|
||||
newsds = activeDefragSds(keysds);
|
||||
if (newsds) {
|
||||
kvstoreDictSetKey(db->keys, slot, de, newsds);
|
||||
if (kvstoreDictSize(db->expires, slot)) {
|
||||
/* We can't search in db->expires for that key after we've released
|
||||
* the pointer it holds, since it won't be able to do the string
|
||||
* compare, but we can find the entry using key hash and pointer. */
|
||||
uint64_t hash = kvstoreGetHash(db->expires, newsds);
|
||||
dictEntry *expire_de = kvstoreDictFindByHashAndPtr(db->expires, slot, keysds, hash);
|
||||
if (expire_de) kvstoreDictSetKey(db->expires, slot, expire_de, newsds);
|
||||
}
|
||||
|
||||
/* Update the key's reference in the dict's metadata or the listpackEx. */
|
||||
if (unlikely(ob->type == OBJ_HASH))
|
||||
hashTypeUpdateKeyRef(ob, newsds);
|
||||
}
|
||||
|
||||
long long expire = kvobjGetExpire(ob);
|
||||
/* We can't search in db->expires for that KV after we've released
|
||||
* the pointer it holds, since it won't be able to do the string
|
||||
* compare. Search it before, if needed. */
|
||||
if (expire != -1) {
|
||||
exlink = kvstoreDictFindLink(db->expires, slot, kvobjGetKey(ob), NULL);
|
||||
serverAssert(exlink != NULL);
|
||||
}
|
||||
|
||||
/* Try to defrag robj and/or string value. For hash objects with HFEs,
|
||||
* defer defragmentation until processing db's hexpires. */
|
||||
if (!(ob->type == OBJ_HASH && hashTypeGetMinExpire(ob, 0) != EB_EXPIRE_TIME_INVALID)) {
|
||||
if ((newob = activeDefragStringOb(ob))) {
|
||||
kvstoreDictSetVal(db->keys, slot, de, newob);
|
||||
ob = newob;
|
||||
/* If the dict doesn't have metadata, we directly defrag it. */
|
||||
kvnew = activeDefragStringOb(ob);
|
||||
if (kvnew) {
|
||||
kvstoreDictSetAtLink(db->keys, slot, kvnew, &link, 0);
|
||||
if (expire != -1)
|
||||
kvstoreDictSetAtLink(db->expires, slot, kvnew, &exlink, 0);
|
||||
ob = kvnew;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -923,7 +919,7 @@ void defragKey(defragKeysCtx *ctx, dictEntry *de) {
|
|||
/* Already handled in activeDefragStringOb. */
|
||||
} else if (ob->type == OBJ_LIST) {
|
||||
if (ob->encoding == OBJ_ENCODING_QUICKLIST) {
|
||||
defragQuicklist(ctx, de);
|
||||
defragQuicklist(ctx, ob);
|
||||
} else if (ob->encoding == OBJ_ENCODING_LISTPACK) {
|
||||
if ((newzl = activeDefragAlloc(ob->ptr)))
|
||||
ob->ptr = newzl;
|
||||
|
@ -932,7 +928,7 @@ void defragKey(defragKeysCtx *ctx, dictEntry *de) {
|
|||
}
|
||||
} else if (ob->type == OBJ_SET) {
|
||||
if (ob->encoding == OBJ_ENCODING_HT) {
|
||||
defragSet(ctx, de);
|
||||
defragSet(ctx, ob);
|
||||
} else if (ob->encoding == OBJ_ENCODING_INTSET ||
|
||||
ob->encoding == OBJ_ENCODING_LISTPACK)
|
||||
{
|
||||
|
@ -947,7 +943,7 @@ void defragKey(defragKeysCtx *ctx, dictEntry *de) {
|
|||
if ((newzl = activeDefragAlloc(ob->ptr)))
|
||||
ob->ptr = newzl;
|
||||
} else if (ob->encoding == OBJ_ENCODING_SKIPLIST) {
|
||||
defragZsetSkiplist(ctx, de);
|
||||
defragZsetSkiplist(ctx, ob);
|
||||
} else {
|
||||
serverPanic("Unknown sorted set encoding");
|
||||
}
|
||||
|
@ -962,23 +958,23 @@ void defragKey(defragKeysCtx *ctx, dictEntry *de) {
|
|||
if ((newzl = activeDefragAlloc(lpt->lp)))
|
||||
lpt->lp = newzl;
|
||||
} else if (ob->encoding == OBJ_ENCODING_HT) {
|
||||
defragHash(ctx, de);
|
||||
defragHash(ctx, ob);
|
||||
} else {
|
||||
serverPanic("Unknown hash encoding");
|
||||
}
|
||||
} else if (ob->type == OBJ_STREAM) {
|
||||
defragStream(ctx, de);
|
||||
defragStream(ctx, ob);
|
||||
} else if (ob->type == OBJ_MODULE) {
|
||||
defragModule(ctx,db, de);
|
||||
defragModule(ctx,db, ob);
|
||||
} else {
|
||||
serverPanic("Unknown object type");
|
||||
}
|
||||
}
|
||||
|
||||
/* Defrag scan callback for the main db dictionary. */
|
||||
static void dbKeysScanCallback(void *privdata, const dictEntry *de) {
|
||||
static void dbKeysScanCallback(void *privdata, const dictEntry *de, dictEntryLink plink) {
|
||||
long long hits_before = server.stat_active_defrag_hits;
|
||||
defragKey((defragKeysCtx *)privdata, (dictEntry *)de);
|
||||
defragKey((defragKeysCtx *)privdata, (dictEntry *)de, plink);
|
||||
if (server.stat_active_defrag_hits != hits_before)
|
||||
server.stat_active_defrag_key_hits++;
|
||||
else
|
||||
|
@ -1021,7 +1017,8 @@ float getAllocatorFragmentation(size_t *out_frag_bytes) {
|
|||
}
|
||||
|
||||
/* Defrag scan callback for the pubsub dictionary. */
|
||||
void defragPubsubScanCallback(void *privdata, const dictEntry *de) {
|
||||
void defragPubsubScanCallback(void *privdata, const dictEntry *de, dictEntryLink plink) {
|
||||
UNUSED(plink);
|
||||
defragPubSubCtx *ctx = privdata;
|
||||
kvstore *pubsub_channels = ctx->kvstate.kvs;
|
||||
robj *newchannel, *channel = dictGetKey(de);
|
||||
|
@ -1057,9 +1054,8 @@ void defragPubsubScanCallback(void *privdata, const dictEntry *de) {
|
|||
|
||||
/* returns 0 more work may or may not be needed (see non-zero cursor),
|
||||
* and 1 if time is up and more work is needed. */
|
||||
int defragLaterItem(dictEntry *de, unsigned long *cursor, monotime endtime, int dbid) {
|
||||
if (de) {
|
||||
robj *ob = dictGetVal(de);
|
||||
int defragLaterItem(kvobj *ob, unsigned long *cursor, monotime endtime, int dbid) {
|
||||
if (ob) {
|
||||
if (ob->type == OBJ_LIST && ob->encoding == OBJ_ENCODING_QUICKLIST) {
|
||||
return scanLaterList(ob, cursor, endtime);
|
||||
} else if (ob->type == OBJ_SET && ob->encoding == OBJ_ENCODING_HT) {
|
||||
|
@ -1072,7 +1068,7 @@ int defragLaterItem(dictEntry *de, unsigned long *cursor, monotime endtime, int
|
|||
return scanLaterStreamListpacks(ob, cursor, endtime);
|
||||
} else if (ob->type == OBJ_MODULE) {
|
||||
robj keyobj;
|
||||
initStaticStringObject(keyobj, dictGetKey(de));
|
||||
initStaticStringObject(keyobj, kvobjGetKey(ob));
|
||||
return moduleLateDefrag(&keyobj, ob, cursor, endtime, dbid);
|
||||
} else {
|
||||
*cursor = 0; /* object type/encoding may have changed since we schedule it for later */
|
||||
|
@ -1099,9 +1095,10 @@ static doneStatus defragLaterStep(void *ctx, monotime endtime) {
|
|||
listNode *head = listFirst(defrag_keys_ctx->defrag_later);
|
||||
sds key = head->value;
|
||||
dictEntry *de = kvstoreDictFind(defrag_keys_ctx->kvstate.kvs, defrag_keys_ctx->kvstate.slot, key);
|
||||
kvobj *kv = de ? dictGetKV(de) : NULL;
|
||||
|
||||
long long key_defragged = server.stat_active_defrag_hits;
|
||||
int timeout = (defragLaterItem(de, &defrag_keys_ctx->defrag_later_cursor, endtime, defrag_keys_ctx->dbid) == 1);
|
||||
int timeout = (defragLaterItem(kv, &defrag_keys_ctx->defrag_later_cursor, endtime, defrag_keys_ctx->dbid) == 1);
|
||||
if (key_defragged != server.stat_active_defrag_hits) {
|
||||
server.stat_active_defrag_key_hits++;
|
||||
} else {
|
||||
|
@ -1264,29 +1261,32 @@ static doneStatus defragStageExpiresKvstore(void *ctx, monotime endtime) {
|
|||
|
||||
/* Defragment hash object with HFE and update its reference in the DB keys. */
|
||||
void *activeDefragHExpiresOB(void *ptr, void *privdata) {
|
||||
robj *ob = ptr;
|
||||
redisDb *db = privdata;
|
||||
serverAssert(ob->type == OBJ_HASH);
|
||||
dictEntryLink link, exlink = NULL;
|
||||
int slot;
|
||||
kvobj *kvobj = ptr;
|
||||
sds keystr = kvobjGetKey(kvobj);
|
||||
serverAssert(kvobj->type == OBJ_HASH);
|
||||
|
||||
if ((ob = activeDefragAlloc(ob))) {
|
||||
/* Retrieve the associated key string from the hash object. */
|
||||
sds keystr;
|
||||
if (ob->encoding == OBJ_ENCODING_LISTPACK_EX) {
|
||||
keystr = ((listpackEx*)ob->ptr)->key;
|
||||
} else {
|
||||
serverAssert(ob->encoding == OBJ_ENCODING_HT);
|
||||
dict *d = ob->ptr;
|
||||
dictExpireMetadata *dictExpireMeta = (dictExpireMetadata *) dictMetadata(d);
|
||||
keystr = dictExpireMeta->key;
|
||||
}
|
||||
long long expire = kvobjGetExpire(kvobj);
|
||||
/* We can't search in db->expires for that KV after we've released
|
||||
* the pointer it holds, since it won't be able to do the string
|
||||
* compare. Search it before, if needed. */
|
||||
if (expire != -1) {
|
||||
exlink = kvstoreDictFindLink(db->expires, slot, kvobjGetKey(kvobj), NULL);
|
||||
serverAssert(exlink != NULL);
|
||||
}
|
||||
|
||||
if ((kvobj = activeDefragAlloc(kvobj))) {
|
||||
/* Update its reference in the DB keys. */
|
||||
unsigned int slot = calculateKeySlot(keystr);
|
||||
dictEntry *de = kvstoreDictFind(db->keys, slot, keystr);
|
||||
serverAssert(de);
|
||||
kvstoreDictSetVal(db->keys, slot, de, ob);
|
||||
link = kvstoreDictFindLink(db->keys, slot, keystr, NULL);
|
||||
serverAssert(link != NULL);
|
||||
kvstoreDictSetAtLink(db->keys, slot, kvobj, &link, 0);
|
||||
if (expire != -1)
|
||||
kvstoreDictSetAtLink(db->expires, slot, kvobj, &exlink, 0);
|
||||
}
|
||||
return ob;
|
||||
return kvobj;
|
||||
}
|
||||
|
||||
static doneStatus defragStageHExpires(void *ctx, monotime endtime) {
|
||||
|
|
511
src/dict.c
511
src/dict.c
|
@ -8,8 +8,9 @@
|
|||
* Copyright (c) 2006-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#include "fmacros.h"
|
||||
|
@ -60,20 +61,30 @@ typedef struct {
|
|||
|
||||
/* -------------------------- private prototypes ---------------------------- */
|
||||
|
||||
static void _dictExpandIfNeeded(dict *d);
|
||||
static int _dictExpandIfNeeded(dict *d);
|
||||
static void _dictShrinkIfNeeded(dict *d);
|
||||
static void _dictRehashStepIfNeeded(dict *d, uint64_t visitedIdx);
|
||||
static signed char _dictNextExp(unsigned long size);
|
||||
static int _dictInit(dict *d, dictType *type);
|
||||
static dictEntry *dictGetNext(const dictEntry *de);
|
||||
static dictEntry **dictGetNextRef(dictEntry *de);
|
||||
static dictEntryLink dictGetNextLink(dictEntry *de);
|
||||
static void dictSetNext(dictEntry *de, dictEntry *next);
|
||||
static int dictDefaultCompare(dict *d, const void *key1, const void *key2);
|
||||
static int dictDefaultCompare(dictCmpCache *cache, const void *key1, const void *key2);
|
||||
static dictEntryLink dictFindLinkInternal(dict *d, const void *key, dictEntryLink *bucket);
|
||||
dictEntryLink dictFindLinkForInsert(dict *d, const void *key, dictEntry **existing);
|
||||
static dictEntry *dictInsertKeyAtLink(dict *d, void *key, dictEntryLink link);
|
||||
|
||||
/* -------------------------- unused --------------------------- */
|
||||
void dictSetSignedIntegerVal(dictEntry *de, int64_t val);
|
||||
int64_t dictGetSignedIntegerVal(const dictEntry *de);
|
||||
double dictIncrDoubleVal(dictEntry *de, double val);
|
||||
void *dictEntryMetadata(dictEntry *de);
|
||||
int64_t dictIncrSignedIntegerVal(dictEntry *de, int64_t val);
|
||||
|
||||
/* -------------------------- misc inline functions -------------------------------- */
|
||||
|
||||
typedef int (*keyCmpFunc)(dict *d, const void *key1, const void *key2);
|
||||
static inline keyCmpFunc dictGetKeyCmpFunc(dict *d) {
|
||||
typedef int (*keyCmpFunc)(dictCmpCache *cache, const void *key1, const void *key2);
|
||||
static inline keyCmpFunc dictGetCmpFunc(dict *d) {
|
||||
if (d->useStoredKeyApi && d->type->storedKeyCompare)
|
||||
return d->type->storedKeyCompare;
|
||||
if (d->type->keyCompare)
|
||||
|
@ -96,10 +107,6 @@ void dictSetHashFunctionSeed(uint8_t *seed) {
|
|||
memcpy(dict_hash_function_seed,seed,sizeof(dict_hash_function_seed));
|
||||
}
|
||||
|
||||
uint8_t *dictGetHashFunctionSeed(void) {
|
||||
return dict_hash_function_seed;
|
||||
}
|
||||
|
||||
/* The default hashing function uses SipHash implementation
|
||||
* in siphash.c. */
|
||||
|
||||
|
@ -262,12 +269,16 @@ int _dictResize(dict *d, unsigned long size, int* malloc_failed)
|
|||
d->ht_table[1] = new_ht_table;
|
||||
d->rehashidx = 0;
|
||||
if (d->type->rehashingStarted) d->type->rehashingStarted(d);
|
||||
if (d->type->bucketChanged)
|
||||
d->type->bucketChanged(d, DICTHT_SIZE(d->ht_size_exp[1]));
|
||||
|
||||
/* Is this the first initialization or is the first hash table empty? If so
|
||||
* it's not really a rehashing, we can just set the first hash table so that
|
||||
* it can accept keys. */
|
||||
if (d->ht_table[0] == NULL || d->ht_used[0] == 0) {
|
||||
if (d->type->rehashingCompleted) d->type->rehashingCompleted(d);
|
||||
if (d->type->bucketChanged)
|
||||
d->type->bucketChanged(d, -(long long)DICTHT_SIZE(d->ht_size_exp[0]));
|
||||
if (d->ht_table[0]) zfree(d->ht_table[0]);
|
||||
d->ht_size_exp[0] = new_ht_size_exp;
|
||||
d->ht_used[0] = new_ht_used;
|
||||
|
@ -370,6 +381,8 @@ static int dictCheckRehashingCompleted(dict *d) {
|
|||
if (d->ht_used[0] != 0) return 0;
|
||||
|
||||
if (d->type->rehashingCompleted) d->type->rehashingCompleted(d);
|
||||
if (d->type->bucketChanged)
|
||||
d->type->bucketChanged(d, -(long long)DICTHT_SIZE(d->ht_size_exp[0]));
|
||||
zfree(d->ht_table[0]);
|
||||
/* Copy the new ht onto the old one */
|
||||
d->ht_table[0] = d->ht_table[1];
|
||||
|
@ -507,54 +520,21 @@ int dictAdd(dict *d, void *key, void *val)
|
|||
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing)
|
||||
{
|
||||
/* Get the position for the new key or NULL if the key already exists. */
|
||||
void *position = dictFindPositionForInsert(d, key, existing);
|
||||
void *position = dictFindLinkForInsert(d, key, existing);
|
||||
if (!position) return NULL;
|
||||
|
||||
/* Dup the key if necessary. */
|
||||
if (d->type->keyDup) key = d->type->keyDup(d, key);
|
||||
|
||||
return dictInsertAtPosition(d, key, position);
|
||||
return dictInsertKeyAtLink(d, key, position);
|
||||
}
|
||||
|
||||
/* Low-level add function for non-existing keys:
|
||||
* This function adds a new entry to the dictionary, assuming the key does not
|
||||
* already exist.
|
||||
* Parameters:
|
||||
* - `dict *d`: Pointer to the dictionary structure.
|
||||
* - `void *key`: Pointer to the key being added.
|
||||
* - `const uint64_t hash`: hash of the key being added.
|
||||
* Guarantees:
|
||||
* - The key is assumed to be non-existing.
|
||||
* Note:
|
||||
* Ensure that the key's uniqueness is managed externally before calling this function. */
|
||||
dictEntry *dictAddNonExistsByHash(dict *d, void *key, const uint64_t hash) {
|
||||
/* Get the position for the new key, it should never be NULL. */
|
||||
unsigned long idx, table;
|
||||
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
|
||||
|
||||
/* Rehash the hash table if needed */
|
||||
_dictRehashStepIfNeeded(d,idx);
|
||||
|
||||
/* Expand the hash table if needed */
|
||||
_dictExpandIfNeeded(d);
|
||||
|
||||
table = dictIsRehashing(d) ? 1 : 0;
|
||||
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
|
||||
void *position = &d->ht_table[table][idx];
|
||||
assert(position!=NULL);
|
||||
|
||||
/* Dup the key if necessary. */
|
||||
if (d->type->keyDup) key = d->type->keyDup(d, key);
|
||||
|
||||
return dictInsertAtPosition(d, key, position);
|
||||
}
|
||||
|
||||
/* Adds a key in the dict's hashtable at the position returned by a preceding
|
||||
* call to dictFindPositionForInsert. This is a low level function which allows
|
||||
/* Adds a key in the dict's hashtable at the link returned by a preceding
|
||||
* call to dictFindLinkForInsert(). This is a low level function which allows
|
||||
* splitting dictAddRaw in two parts. Normally, dictAddRaw or dictAdd should be
|
||||
* used instead. */
|
||||
dictEntry *dictInsertAtPosition(dict *d, void *key, void *position) {
|
||||
dictEntry **bucket = position; /* It's a bucket, but the API hides that. */
|
||||
* used instead. It assumes that dictExpandIfNeeded() was called before. */
|
||||
dictEntry *dictInsertKeyAtLink(dict *d, void *key, dictEntryLink link) {
|
||||
dictEntryLink bucket = link; /* It's a bucket, but the API hides that. */
|
||||
dictEntry *entry;
|
||||
/* If rehashing is ongoing, we insert in table 1, otherwise in table 0.
|
||||
* Assert that the provided bucket is the right table. */
|
||||
|
@ -639,6 +619,7 @@ dictEntry *dictAddOrFind(dict *d, void *key) {
|
|||
* dictDelete() and dictUnlink(), please check the top comment
|
||||
* of those functions. */
|
||||
static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
|
||||
dictCmpCache cmpCache = {0};
|
||||
uint64_t h, idx;
|
||||
dictEntry *he, *prevHe;
|
||||
int table;
|
||||
|
@ -652,7 +633,7 @@ static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
|
|||
/* Rehash the hash table if needed */
|
||||
_dictRehashStepIfNeeded(d,idx);
|
||||
|
||||
keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d);
|
||||
keyCmpFunc cmpFunc = dictGetCmpFunc(d);
|
||||
|
||||
for (table = 0; table <= 1; table++) {
|
||||
if (table == 0 && (long)idx < d->rehashidx) continue;
|
||||
|
@ -661,7 +642,7 @@ static dictEntry *dictGenericDelete(dict *d, const void *key, int nofree) {
|
|||
prevHe = NULL;
|
||||
while(he) {
|
||||
void *he_key = dictGetKey(he);
|
||||
if (key == he_key || cmpFunc(d, key, he_key)) {
|
||||
if (key == he_key || cmpFunc(&cmpCache, key, he_key)) {
|
||||
/* Unlink the element from the list */
|
||||
if (prevHe)
|
||||
dictSetNext(prevHe, dictGetNext(he));
|
||||
|
@ -758,6 +739,10 @@ void dictRelease(dict *d)
|
|||
if (dictIsRehashing(d) && d->type->rehashingCompleted)
|
||||
d->type->rehashingCompleted(d);
|
||||
|
||||
/* Subtract the size of all buckets. */
|
||||
if (d->type->bucketChanged)
|
||||
d->type->bucketChanged(d, -(long long)dictBuckets(d));
|
||||
|
||||
if (d->type->onDictRelease)
|
||||
d->type->onDictRelease(d);
|
||||
|
||||
|
@ -766,55 +751,153 @@ void dictRelease(dict *d)
|
|||
zfree(d);
|
||||
}
|
||||
|
||||
dictEntry *dictFindByHash(dict *d, const void *key, const uint64_t hash) {
|
||||
dictEntry *he;
|
||||
uint64_t idx, table;
|
||||
|
||||
if (dictSize(d) == 0) return NULL; /* dict is empty */
|
||||
/* Finds a given key. Like dictFindLink(), yet search bucket even if dict is empty.
|
||||
*
|
||||
* Returns dictEntryLink reference if found. Otherwise, return NULL.
|
||||
*
|
||||
* bucket - return pointer to bucket that the key was mapped. unless dict is empty.
|
||||
*/
|
||||
static dictEntryLink dictFindLinkInternal(dict *d, const void *key, dictEntryLink *bucket) {
|
||||
dictCmpCache cmpCache = {0};
|
||||
dictEntryLink link;
|
||||
uint64_t idx;
|
||||
int table;
|
||||
|
||||
if (bucket) {
|
||||
*bucket = NULL;
|
||||
} else {
|
||||
/* If dict is empty and no need to find bucket, return NULL */
|
||||
if (dictSize(d) == 0) return NULL;
|
||||
}
|
||||
|
||||
const uint64_t hash = dictHashKey(d, key, d->useStoredKeyApi);
|
||||
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[0]);
|
||||
keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d);
|
||||
keyCmpFunc cmpFunc = dictGetCmpFunc(d);
|
||||
|
||||
/* Rehash the hash table if needed */
|
||||
_dictRehashStepIfNeeded(d,idx);
|
||||
|
||||
/* Check if we can use the compare function with length to avoid recomputing length of key always */
|
||||
keyCmpFuncWithLen cmpFuncWithLen = d->type->keyCompareWithLen;
|
||||
keyLenFunc keyLenFunc = d->type->keyLen;
|
||||
const int has_len_fn = (keyLenFunc != NULL && cmpFuncWithLen != NULL);
|
||||
const size_t key_len = has_len_fn ? keyLenFunc(d,key) : 0;
|
||||
for (table = 0; table <= 1; table++) {
|
||||
int tables = (dictIsRehashing(d)) ? 2 : 1;
|
||||
for (table = 0; table < tables; table++) {
|
||||
if (table == 0 && (long)idx < d->rehashidx) continue;
|
||||
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
|
||||
|
||||
/* Prefetch the bucket at the calculated index */
|
||||
redis_prefetch_read(&d->ht_table[table][idx]);
|
||||
|
||||
he = d->ht_table[table][idx];
|
||||
while(he) {
|
||||
void *he_key = dictGetKey(he);
|
||||
link = &(d->ht_table[table][idx]);
|
||||
if (bucket) *bucket = link;
|
||||
while(link && *link) {
|
||||
void *visitedKey = dictGetKey(*link);
|
||||
|
||||
/* Prefetch the next entry to improve cache efficiency */
|
||||
redis_prefetch_read(dictGetNext(he));
|
||||
if (key == he_key || (has_len_fn ?
|
||||
cmpFuncWithLen(d, key, key_len, he_key, keyLenFunc(d,he_key)) :
|
||||
cmpFunc(d, key, he_key)))
|
||||
{
|
||||
return he;
|
||||
}
|
||||
he = dictGetNext(he);
|
||||
redis_prefetch_read(dictGetNext(*link));
|
||||
|
||||
if (key == visitedKey || cmpFunc( &cmpCache, key, visitedKey))
|
||||
return link;
|
||||
|
||||
link = dictGetNextLink(*link);
|
||||
}
|
||||
/* Use unlikely to optimize branch prediction for the common case */
|
||||
if (unlikely(!dictIsRehashing(d))) return NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
dictEntry *dictFind(dict *d, const void *key)
|
||||
{
|
||||
if (dictSize(d) == 0) return NULL; /* dict is empty */
|
||||
const uint64_t hash = dictHashKey(d, key, d->useStoredKeyApi);
|
||||
return dictFindByHash(d,key,hash);
|
||||
dictEntryLink link = dictFindLink(d, key, NULL);
|
||||
return (link) ? *link : NULL;
|
||||
}
|
||||
|
||||
/* Find a key and return its dictEntryLink reference. Otherwise, return NULL
|
||||
*
|
||||
* A dictEntryLink pointer being used to find preceding dictEntry of searched item.
|
||||
* It is Useful for deletion, addition, unlinking and updating, especially for
|
||||
* dict configured with 'no_value'. In such cases returning only `dictEntry` from
|
||||
* a lookup may be insufficient since it might be opt-out to be the object itself.
|
||||
* By locating preceding dictEntry (dictEntryLink) these ops can be properly handled.
|
||||
*
|
||||
* After calling link = dictFindLink(...), any necessary updates based on returned
|
||||
* link or bucket must be performed immediately after by calling dictSetKeyAtLink()
|
||||
* without any intervening operations on given dict. Otherwise, `dictEntryLink` may
|
||||
* become invalid. Example with kvobj of replacing key with new key:
|
||||
*
|
||||
* link = dictFindLink(d, key, &bucket, 0);
|
||||
* ... Do something, but don't modify the dict ...
|
||||
* // assert(link != NULL);
|
||||
* dictSetKeyAtLink(d, kv, &link, 0);
|
||||
*
|
||||
* To add new value (If no space for the new key, dict will be expanded by
|
||||
* dictSetKeyAtLink() and bucket will be looked up again.):
|
||||
*
|
||||
* link = dictFindLink(d, key, &bucket);
|
||||
* ... Do something, but don't modify the dict ...
|
||||
* // assert(link == NULL);
|
||||
* dictSetKeyAtLink(d, kv, &bucket, 1);
|
||||
*
|
||||
* bucket - return link to bucket that the key was mapped. unless dict is empty.
|
||||
*/
|
||||
dictEntryLink dictFindLink(dict *d, const void *key, dictEntryLink *bucket) {
|
||||
if (bucket) *bucket = NULL;
|
||||
if (unlikely(dictSize(d) == 0))
|
||||
return NULL;
|
||||
|
||||
return dictFindLinkInternal(d, key, bucket);
|
||||
}
|
||||
|
||||
/* Set the key with link
|
||||
*
|
||||
* link: - When `newItem` is set, `link` points to the bucket of the key.
|
||||
* - When `newItem` is not set, `link` points to the link of the key.
|
||||
* - If *link is NULL, dictFindLink() will be called to locate the key.
|
||||
* - On return, get updated, by need, to the inserted key.
|
||||
*
|
||||
* newItem: 1 = Add a key with a new dictEntry.
|
||||
* 0 = Set a key to an existing dictEntry.
|
||||
*/
|
||||
void dictSetKeyAtLink(dict *d, void *key, dictEntryLink *link, int newItem) {
|
||||
dictEntryLink dummy = NULL;
|
||||
if (link == NULL) link = &dummy;
|
||||
void *addedKey = (d->type->keyDup) ? d->type->keyDup(d, key) : key;
|
||||
|
||||
if (newItem) {
|
||||
signed char snap[2] = {d->ht_size_exp[0], d->ht_size_exp[1] };
|
||||
|
||||
/* Make room if needed for the new key */
|
||||
dictExpandIfNeeded(d);
|
||||
|
||||
/* Lookup key's link if tables reallocated or if given link is set to NULL */
|
||||
if (snap[0] != d->ht_size_exp[0] || snap[1] != d->ht_size_exp[1] || *link == NULL) {
|
||||
dictEntryLink bucket;
|
||||
/* Bypass dictFindLink() to search bucket even if dict is empty!!! */
|
||||
dictUseStoredKeyApi(d, 1);
|
||||
*link = dictFindLinkInternal(d, key, &bucket);
|
||||
dictUseStoredKeyApi(d, 0);
|
||||
assert(bucket != NULL);
|
||||
assert(*link == NULL);
|
||||
*link = bucket; /* On newItem the link should be the bucket */
|
||||
}
|
||||
dictInsertKeyAtLink(d, addedKey, *link);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Setting key of existing dictEntry (newItem == 0)*/
|
||||
|
||||
if (*link == NULL) {
|
||||
*link = dictFindLink(d, key, NULL);
|
||||
assert(*link != NULL);
|
||||
}
|
||||
|
||||
dictEntry **de = *link;
|
||||
/* is it regular dict entry of key and next */
|
||||
if (entryIsNoValue(*de)) {
|
||||
decodeEntryNoValue(*de)->key = addedKey;
|
||||
} else if (entryIsKey(*de)) {
|
||||
/* `de` opt-out to be actually a key. Replace key but keep the lsb flags */
|
||||
int mask = ((uintptr_t) *de) & ENTRY_PTR_MASK;
|
||||
*de = encodeMaskedPtr(addedKey, mask);
|
||||
} else {
|
||||
(*de)->key = addedKey; /* `de` is a normal key-value dict entry */
|
||||
}
|
||||
}
|
||||
|
||||
void *dictFetchValue(dict *d, const void *key) {
|
||||
|
@ -824,30 +907,31 @@ void *dictFetchValue(dict *d, const void *key) {
|
|||
return he ? dictGetVal(he) : NULL;
|
||||
}
|
||||
|
||||
/* Find an element from the table, also get the plink of the entry. The entry
|
||||
* is returned if the element is found, and the user should later call
|
||||
* `dictTwoPhaseUnlinkFree` with it in order to unlink and release it. Otherwise if
|
||||
* the key is not found, NULL is returned. These two functions should be used in pair.
|
||||
/* Find an element from the table. A link is returned if the element is found, and
|
||||
* the user should later call `dictTwoPhaseUnlinkFree` with it in order to unlink
|
||||
* and release it. Otherwise if the key is not found, NULL is returned. These two
|
||||
* functions should be used in pair.
|
||||
* `dictTwoPhaseUnlinkFind` pauses rehash and `dictTwoPhaseUnlinkFree` resumes rehash.
|
||||
*
|
||||
* We can use like this:
|
||||
*
|
||||
* dictEntry *de = dictTwoPhaseUnlinkFind(db->dict,key->ptr,&plink, &table);
|
||||
* dictEntryLink link = dictTwoPhaseUnlinkFind(db->dict,key->ptr, &table);
|
||||
* // Do something, but we can't modify the dict
|
||||
* dictTwoPhaseUnlinkFree(db->dict,de,plink,table); // We don't need to lookup again
|
||||
* dictTwoPhaseUnlinkFree(db->dict, link, table); // We don't need to lookup again
|
||||
*
|
||||
* If we want to find an entry before delete this entry, this an optimization to avoid
|
||||
* dictFind followed by dictDelete. i.e. the first API is a find, and it gives some info
|
||||
* to the second one to avoid repeating the lookup
|
||||
*/
|
||||
dictEntry *dictTwoPhaseUnlinkFind(dict *d, const void *key, dictEntry ***plink, int *table_index) {
|
||||
dictEntryLink dictTwoPhaseUnlinkFind(dict *d, const void *key, int *table_index) {
|
||||
dictCmpCache cmpCache = {0};
|
||||
uint64_t h, idx, table;
|
||||
|
||||
if (dictSize(d) == 0) return NULL; /* dict is empty */
|
||||
if (dictIsRehashing(d)) _dictRehashStep(d);
|
||||
|
||||
h = dictHashKey(d, key, d->useStoredKeyApi);
|
||||
keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d);
|
||||
h = dictHashKey(d, key, d->useStoredKeyApi);
|
||||
keyCmpFunc cmpFunc = dictGetCmpFunc(d);
|
||||
|
||||
for (table = 0; table <= 1; table++) {
|
||||
idx = h & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
|
||||
|
@ -855,26 +939,27 @@ dictEntry *dictTwoPhaseUnlinkFind(dict *d, const void *key, dictEntry ***plink,
|
|||
dictEntry **ref = &d->ht_table[table][idx];
|
||||
while (ref && *ref) {
|
||||
void *de_key = dictGetKey(*ref);
|
||||
if (key == de_key || cmpFunc(d, key, de_key)) {
|
||||
if (key == de_key || cmpFunc(&cmpCache, key, de_key)) {
|
||||
*table_index = table;
|
||||
*plink = ref;
|
||||
dictPauseRehashing(d);
|
||||
return *ref;
|
||||
return ref;
|
||||
}
|
||||
ref = dictGetNextRef(*ref);
|
||||
ref = dictGetNextLink(*ref);
|
||||
}
|
||||
if (!dictIsRehashing(d)) return NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void dictTwoPhaseUnlinkFree(dict *d, dictEntry *he, dictEntry **plink, int table_index) {
|
||||
if (he == NULL) return;
|
||||
void dictTwoPhaseUnlinkFree(dict *d, dictEntryLink plink, int table_index) {
|
||||
if (plink == NULL || *plink == NULL) return;
|
||||
dictEntry *de = *plink;
|
||||
d->ht_used[table_index]--;
|
||||
*plink = dictGetNext(he);
|
||||
dictFreeKey(d, he);
|
||||
dictFreeVal(d, he);
|
||||
if (!entryIsKey(he)) zfree(decodeMaskedPtr(he));
|
||||
|
||||
*plink = dictGetNext(de);
|
||||
dictFreeKey(d, de);
|
||||
dictFreeVal(d, de);
|
||||
if (!entryIsKey(de)) zfree(decodeMaskedPtr(de));
|
||||
_dictShrinkIfNeeded(d);
|
||||
dictResumeRehashing(d);
|
||||
}
|
||||
|
@ -967,7 +1052,7 @@ static dictEntry *dictGetNext(const dictEntry *de) {
|
|||
|
||||
/* Returns a pointer to the 'next' field in the entry or NULL if the entry
|
||||
* doesn't have a next field. */
|
||||
static dictEntry **dictGetNextRef(dictEntry *de) {
|
||||
static dictEntryLink dictGetNextLink(dictEntry *de) {
|
||||
if (entryIsKey(de)) return NULL;
|
||||
if (entryIsNoValue(de)) return &decodeEntryNoValue(de)->next;
|
||||
return &de->next;
|
||||
|
@ -990,8 +1075,8 @@ size_t dictMemUsage(const dict *d) {
|
|||
dictBuckets(d) * sizeof(dictEntry*);
|
||||
}
|
||||
|
||||
size_t dictEntryMemUsage(void) {
|
||||
return sizeof(dictEntry);
|
||||
size_t dictEntryMemUsage(int noValueDict) {
|
||||
return (noValueDict) ? sizeof(dictEntryNoValue) :sizeof(dictEntry);
|
||||
}
|
||||
|
||||
/* A fingerprint is a 64 bit number that represents the state of the dictionary
|
||||
|
@ -1273,7 +1358,6 @@ static void dictDefragBucket(dictEntry **bucketref, dictDefragFunctions *defragf
|
|||
void *newval = defragval ? defragval(dictGetVal(de)) : NULL;
|
||||
if (entryIsKey(de)) {
|
||||
if (newkey) *bucketref = newkey;
|
||||
assert(entryIsKey(*bucketref));
|
||||
} else if (entryIsNoValue(de)) {
|
||||
dictEntryNoValue *entry = decodeEntryNoValue(de), *newentry;
|
||||
if ((newentry = defragalloc(entry))) {
|
||||
|
@ -1291,7 +1375,7 @@ static void dictDefragBucket(dictEntry **bucketref, dictDefragFunctions *defragf
|
|||
if (newde) {
|
||||
*bucketref = newde;
|
||||
}
|
||||
bucketref = dictGetNextRef(*bucketref);
|
||||
bucketref = dictGetNextLink(*bucketref);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1423,6 +1507,32 @@ unsigned long dictScan(dict *d,
|
|||
return dictScanDefrag(d, v, fn, NULL, privdata);
|
||||
}
|
||||
|
||||
void dictScanDefragBucket(dictScanFunction *fn,
|
||||
dictDefragFunctions *defragfns,
|
||||
void *privdata,
|
||||
dictEntry **bucketref) {
|
||||
dictEntry **plink, *de, *next;
|
||||
|
||||
/* Emit entries at bucket */
|
||||
if (defragfns) dictDefragBucket(bucketref, defragfns);
|
||||
|
||||
de = *bucketref;
|
||||
plink = bucketref;
|
||||
while (de) {
|
||||
next = dictGetNext(de);
|
||||
fn(privdata, de, plink);
|
||||
|
||||
if (!next) break; /* if last element, break */
|
||||
|
||||
/* if `*plink` still pointing to 'de', then it means that the
|
||||
* visited item wasn't deleted by fn() */
|
||||
if (*plink == de)
|
||||
plink = (entryIsNoValue(de)) ? &(decodeEntryNoValue(de)->next) : &(de->next);
|
||||
|
||||
de = next;
|
||||
}
|
||||
}
|
||||
|
||||
/* Like dictScan, but additionally reallocates the memory used by the dict
|
||||
* entries using the provided allocation function. This feature was added for
|
||||
* the active defrag feature.
|
||||
|
@ -1438,7 +1548,6 @@ unsigned long dictScanDefrag(dict *d,
|
|||
void *privdata)
|
||||
{
|
||||
int htidx0, htidx1;
|
||||
const dictEntry *de, *next;
|
||||
unsigned long m0, m1;
|
||||
|
||||
if (dictSize(d) == 0) return 0;
|
||||
|
@ -1449,17 +1558,7 @@ unsigned long dictScanDefrag(dict *d,
|
|||
if (!dictIsRehashing(d)) {
|
||||
htidx0 = 0;
|
||||
m0 = DICTHT_SIZE_MASK(d->ht_size_exp[htidx0]);
|
||||
|
||||
/* Emit entries at cursor */
|
||||
if (defragfns) {
|
||||
dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns);
|
||||
}
|
||||
de = d->ht_table[htidx0][v & m0];
|
||||
while (de) {
|
||||
next = dictGetNext(de);
|
||||
fn(privdata, de);
|
||||
de = next;
|
||||
}
|
||||
dictScanDefragBucket(fn, defragfns, privdata, &d->ht_table[htidx0][v & m0]);
|
||||
|
||||
/* Set unmasked bits so incrementing the reversed cursor
|
||||
* operates on the masked bits */
|
||||
|
@ -1483,30 +1582,12 @@ unsigned long dictScanDefrag(dict *d,
|
|||
m0 = DICTHT_SIZE_MASK(d->ht_size_exp[htidx0]);
|
||||
m1 = DICTHT_SIZE_MASK(d->ht_size_exp[htidx1]);
|
||||
|
||||
/* Emit entries at cursor */
|
||||
if (defragfns) {
|
||||
dictDefragBucket(&d->ht_table[htidx0][v & m0], defragfns);
|
||||
}
|
||||
de = d->ht_table[htidx0][v & m0];
|
||||
while (de) {
|
||||
next = dictGetNext(de);
|
||||
fn(privdata, de);
|
||||
de = next;
|
||||
}
|
||||
dictScanDefragBucket(fn, defragfns, privdata, &d->ht_table[htidx0][v & m0]);
|
||||
|
||||
/* Iterate over indices in larger table that are the expansion
|
||||
* of the index pointed to by the cursor in the smaller table */
|
||||
do {
|
||||
/* Emit entries at cursor */
|
||||
if (defragfns) {
|
||||
dictDefragBucket(&d->ht_table[htidx1][v & m1], defragfns);
|
||||
}
|
||||
de = d->ht_table[htidx1][v & m1];
|
||||
while (de) {
|
||||
next = dictGetNext(de);
|
||||
fn(privdata, de);
|
||||
de = next;
|
||||
}
|
||||
dictScanDefragBucket(fn, defragfns, privdata, &d->ht_table[htidx1][v & m1]);
|
||||
|
||||
/* Increment the reverse cursor not covered by the smaller mask.*/
|
||||
v |= ~m1;
|
||||
|
@ -1564,12 +1645,12 @@ int dictExpandIfNeeded(dict *d) {
|
|||
return DICT_ERR;
|
||||
}
|
||||
|
||||
/* Expand the hash table if needed */
|
||||
static void _dictExpandIfNeeded(dict *d) {
|
||||
/* Expand the hash table if needed (OK=Expanded, ERR=Not expanded) */
|
||||
static int _dictExpandIfNeeded(dict *d) {
|
||||
/* Automatic resizing is disallowed. Return */
|
||||
if (d->pauseAutoResize > 0) return;
|
||||
|
||||
dictExpandIfNeeded(d);
|
||||
if (d->pauseAutoResize > 0) return DICT_ERR;
|
||||
|
||||
return dictExpandIfNeeded(d);
|
||||
}
|
||||
|
||||
/* Returning DICT_OK indicates a successful shrinking or the dictionary is undergoing rehashing,
|
||||
|
@ -1629,12 +1710,13 @@ static signed char _dictNextExp(unsigned long size)
|
|||
return 8*sizeof(long) - __builtin_clzl(size-1);
|
||||
}
|
||||
|
||||
/* Finds and returns the position within the dict where the provided key should
|
||||
* be inserted using dictInsertAtPosition if the key does not already exist in
|
||||
/* Finds and returns the link within the dict where the provided key should
|
||||
* be inserted using dictInsertKeyAtLink() if the key does not already exist in
|
||||
* the dict. If the key exists in the dict, NULL is returned and the optional
|
||||
* 'existing' entry pointer is populated, if provided. */
|
||||
void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing) {
|
||||
dictEntryLink dictFindLinkForInsert(dict *d, const void *key, dictEntry **existing) {
|
||||
unsigned long idx, table;
|
||||
dictCmpCache cmpCache = {0};
|
||||
dictEntry *he;
|
||||
uint64_t hash = dictHashKey(d, key, d->useStoredKeyApi);
|
||||
if (existing) *existing = NULL;
|
||||
|
@ -1645,7 +1727,7 @@ void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing)
|
|||
|
||||
/* Expand the hash table if needed */
|
||||
_dictExpandIfNeeded(d);
|
||||
keyCmpFunc cmpFunc = dictGetKeyCmpFunc(d);
|
||||
keyCmpFunc cmpFunc = dictGetCmpFunc(d);
|
||||
|
||||
for (table = 0; table <= 1; table++) {
|
||||
if (table == 0 && (long)idx < d->rehashidx) continue;
|
||||
|
@ -1654,7 +1736,7 @@ void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing)
|
|||
he = d->ht_table[table][idx];
|
||||
while(he) {
|
||||
void *he_key = dictGetKey(he);
|
||||
if (key == he_key || cmpFunc(d, key, he_key)) {
|
||||
if (key == he_key || cmpFunc(&cmpCache, key, he_key)) {
|
||||
if (existing) *existing = he;
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1675,6 +1757,11 @@ void dictEmpty(dict *d, void(callback)(dict*)) {
|
|||
* destroying the dict fake completion. */
|
||||
if (dictIsRehashing(d) && d->type->rehashingCompleted)
|
||||
d->type->rehashingCompleted(d);
|
||||
|
||||
/* Subtract the size of all buckets. */
|
||||
if (d->type->bucketChanged)
|
||||
d->type->bucketChanged(d, -(long long)dictBuckets(d));
|
||||
|
||||
_dictClear(d,0,callback);
|
||||
_dictClear(d,1,callback);
|
||||
d->rehashidx = -1;
|
||||
|
@ -1690,30 +1777,6 @@ uint64_t dictGetHash(dict *d, const void *key) {
|
|||
return dictHashKey(d, key, d->useStoredKeyApi);
|
||||
}
|
||||
|
||||
/* Finds the dictEntry using pointer and pre-calculated hash.
|
||||
* oldkey is a dead pointer and should not be accessed.
|
||||
* the hash value should be provided using dictGetHash.
|
||||
* no string / key comparison is performed.
|
||||
* return value is a pointer to the dictEntry if found, or NULL if not found. */
|
||||
dictEntry *dictFindByHashAndPtr(dict *d, const void *oldptr, const uint64_t hash) {
|
||||
dictEntry *he;
|
||||
unsigned long idx, table;
|
||||
|
||||
if (dictSize(d) == 0) return NULL; /* dict is empty */
|
||||
for (table = 0; table <= 1; table++) {
|
||||
idx = hash & DICTHT_SIZE_MASK(d->ht_size_exp[table]);
|
||||
if (table == 0 && (long)idx < d->rehashidx) continue;
|
||||
he = d->ht_table[table][idx];
|
||||
while(he) {
|
||||
if (oldptr == dictGetKey(he))
|
||||
return he;
|
||||
he = dictGetNext(he);
|
||||
}
|
||||
if (!dictIsRehashing(d)) return NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Provides the old and new ht size for a given dictionary during rehashing. This method
|
||||
* should only be invoked during initialization/rehashing. */
|
||||
void dictRehashingInfo(dict *d, unsigned long long *from_size, unsigned long long *to_size) {
|
||||
|
@ -1832,8 +1895,8 @@ void dictGetStats(char *buf, size_t bufsize, dict *d, int full) {
|
|||
orig_buf[orig_bufsize-1] = '\0';
|
||||
}
|
||||
|
||||
static int dictDefaultCompare(dict *d, const void *key1, const void *key2) {
|
||||
(void)(d); /*unused*/
|
||||
static int dictDefaultCompare(dictCmpCache *cache, const void *key1, const void *key2) {
|
||||
(void)(cache); /*unused*/
|
||||
return key1 == key2;
|
||||
}
|
||||
|
||||
|
@ -1849,9 +1912,9 @@ uint64_t hashCallback(const void *key) {
|
|||
return dictGenHashFunction((unsigned char*)key, strlen((char*)key));
|
||||
}
|
||||
|
||||
int compareCallback(dict *d, const void *key1, const void *key2) {
|
||||
int compareCallback(dictCmpCache *cache, const void *key1, const void *key2) {
|
||||
int l1,l2;
|
||||
UNUSED(d);
|
||||
UNUSED(cache);
|
||||
|
||||
l1 = strlen((char*)key1);
|
||||
l2 = strlen((char*)key2);
|
||||
|
@ -2110,57 +2173,6 @@ int dictTest(int argc, char **argv, int flags) {
|
|||
end_benchmark("Inserting via dictAddRaw() existing (no insertion)");
|
||||
assert((long)dictSize(d) == count);
|
||||
|
||||
dictEmpty(d, NULL);
|
||||
|
||||
start_benchmark();
|
||||
for (j = 0; j < count; j++) {
|
||||
void *key = stringFromLongLong(j);
|
||||
const uint64_t hash = dictGetHash(d, key);
|
||||
de = dictAddNonExistsByHash(d,key,hash);
|
||||
assert(de != NULL);
|
||||
}
|
||||
end_benchmark("Inserting via dictAddNonExistsByHash() non existing");
|
||||
assert((long)dictSize(d) == count);
|
||||
|
||||
/* Wait for rehashing. */
|
||||
while (dictIsRehashing(d)) {
|
||||
dictRehashMicroseconds(d,100*1000);
|
||||
}
|
||||
|
||||
dictEmpty(d, NULL);
|
||||
|
||||
start_benchmark();
|
||||
for (j = 0; j < count; j++) {
|
||||
/* Create a key */
|
||||
void *key = stringFromLongLong(j);
|
||||
|
||||
/* Check if the key exists */
|
||||
dictEntry *entry = dictFind(d, key);
|
||||
assert(entry == NULL);
|
||||
|
||||
/* Add the key */
|
||||
dictEntry *de = dictAddRaw(d, key, NULL);
|
||||
assert(de != NULL);
|
||||
}
|
||||
end_benchmark("Find() and inserting via dictFind()+dictAddRaw() non existing");
|
||||
|
||||
dictEmpty(d, NULL);
|
||||
|
||||
start_benchmark();
|
||||
for (j = 0; j < count; j++) {
|
||||
/* Create a key */
|
||||
void *key = stringFromLongLong(j);
|
||||
uint64_t hash = dictGetHash(d, key);
|
||||
|
||||
/* Check if the key exists */
|
||||
dictEntry *entry = dictFindByHash(d, key, hash);
|
||||
assert(entry == NULL);
|
||||
de = dictAddNonExistsByHash(d, key, hash);
|
||||
assert(de != NULL);
|
||||
}
|
||||
end_benchmark("Find() and inserting via dictGetHash()+dictFindByHash()+dictAddNonExistsByHash() non existing");
|
||||
assert((long)dictSize(d) == count);
|
||||
|
||||
/* Wait for rehashing. */
|
||||
while (dictIsRehashing(d)) {
|
||||
dictRehashMicroseconds(d,100*1000);
|
||||
|
@ -2259,6 +2271,53 @@ int dictTest(int argc, char **argv, int flags) {
|
|||
zfree(lookupKeys);
|
||||
}
|
||||
|
||||
TEST("Test dictFindLink() functionality") {
|
||||
dictType dt = BenchmarkDictType;
|
||||
dict *d = dictCreate(&dt);
|
||||
|
||||
/* find in empty dict */
|
||||
dictEntryLink link = dictFindLink(d, "key", NULL);
|
||||
assert(link == NULL);
|
||||
|
||||
/* Add keys to dict and test */
|
||||
for (j = 0; j < 10; j++) {
|
||||
/* Add another key to dict */
|
||||
char *key = stringFromLongLong(j);
|
||||
retval = dictAdd(d, key, (void*)j);
|
||||
assert(retval == DICT_OK);
|
||||
/* find existing keys with dictFindLink() */
|
||||
dictEntryLink link = dictFindLink(d, key, NULL);
|
||||
assert(link != NULL);
|
||||
assert(*link != NULL);
|
||||
assert(dictGetKey(*link) != NULL);
|
||||
|
||||
/* Test that the key found is the correct one */
|
||||
void *foundKey = dictGetKey(*link);
|
||||
assert(compareCallback( NULL, foundKey, key));
|
||||
|
||||
/* Test finding a non-existing key */
|
||||
char *nonExistingKey = stringFromLongLong(j + 10);
|
||||
link = dictFindLink(d, nonExistingKey, NULL);
|
||||
assert(link == NULL);
|
||||
|
||||
/* Test with bucket parameter */
|
||||
dictEntryLink bucket = NULL;
|
||||
link = dictFindLink(d, key, &bucket);
|
||||
assert(link != NULL);
|
||||
assert(bucket != NULL);
|
||||
|
||||
/* Test bucket parameter with non-existing key */
|
||||
link = dictFindLink(d, nonExistingKey, &bucket);
|
||||
assert(link == NULL);
|
||||
assert(bucket != NULL); /* Bucket should still be set even for non-existing keys */
|
||||
|
||||
/* Clean up */
|
||||
zfree(nonExistingKey);
|
||||
}
|
||||
|
||||
dictRelease(d);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
|
84
src/dict.h
84
src/dict.h
|
@ -8,8 +8,9 @@
|
|||
* Copyright (c) 2006-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#ifndef __DICT_H
|
||||
|
@ -28,15 +29,33 @@
|
|||
|
||||
typedef struct dictEntry dictEntry; /* opaque */
|
||||
typedef struct dict dict;
|
||||
typedef size_t (*keyLenFunc)(dict *d, const void *key1);
|
||||
typedef int (*keyCmpFuncWithLen)(dict *d, const void *key1, const size_t key1_len, const void *key2, const size_t key2_len);
|
||||
typedef dictEntry **dictEntryLink; /* See description of dictFindLink() */
|
||||
|
||||
/* Searching for a key in a dict may involve few comparisons.
|
||||
* If extracting the looked-up key is expensive (e.g., sdslen(), kvobjGetKey()),
|
||||
* caching can be used to reduce those repetitive computations.
|
||||
*
|
||||
* This struct, passed to the comparison function as temporary caching, if
|
||||
* needed by the function across comparison of a given lookup.
|
||||
* for the looked-up key and resets before each new lookup. */
|
||||
typedef struct dictCmpCache {
|
||||
int useCache;
|
||||
|
||||
union {
|
||||
uint64_t u64;
|
||||
int64_t i64;
|
||||
int i;
|
||||
size_t sz;
|
||||
void *p;
|
||||
} data[2];
|
||||
} dictCmpCache;
|
||||
|
||||
typedef struct dictType {
|
||||
/* Callbacks */
|
||||
uint64_t (*hashFunction)(const void *key);
|
||||
void *(*keyDup)(dict *d, const void *key);
|
||||
void *(*valDup)(dict *d, const void *obj);
|
||||
int (*keyCompare)(dict *d, const void *key1, const void *key2);
|
||||
int (*keyCompare)(dictCmpCache *cache, const void *key1, const void *key2);
|
||||
void (*keyDestructor)(dict *d, void *key);
|
||||
void (*valDestructor)(dict *d, void *obj);
|
||||
int (*resizeAllowed)(size_t moreMem, double usedRatio);
|
||||
|
@ -45,6 +64,9 @@ typedef struct dictType {
|
|||
/* Invoked at the end of dict initialization/rehashing of all the entries from old to new ht. Both ht still exists
|
||||
* and are cleaned up after this callback. */
|
||||
void (*rehashingCompleted)(dict *d);
|
||||
/* Invoked when the size of the dictionary changes.
|
||||
* The `delta` parameter can be positive (size increase) or negative (size decrease). */
|
||||
void (*bucketChanged)(dict *d, long long delta);
|
||||
/* Allow a dict to carry extra caller-defined metadata. The
|
||||
* extra memory is initialized to 0 when a dict is allocated. */
|
||||
size_t (*dictMetadataBytes)(dict *d);
|
||||
|
@ -62,8 +84,6 @@ typedef struct dictType {
|
|||
/* This flag is required for `no_value` optimization since the optimization
|
||||
* reuses LSB bits as metadata */
|
||||
unsigned int keys_are_odd:1;
|
||||
/* TODO: Add a 'keys_are_even' flag and use a similar optimization if that
|
||||
* flag is set. */
|
||||
|
||||
/* Ensures that the entire hash table is rehashed at once if set. */
|
||||
unsigned int force_full_rehash:1;
|
||||
|
@ -90,14 +110,10 @@ typedef struct dictType {
|
|||
*
|
||||
* Set to NULL both functions, if you don't want to support this feature. */
|
||||
uint64_t (*storedHashFunction)(const void *key);
|
||||
int (*storedKeyCompare)(dict *d, const void *key1, const void *key2);
|
||||
int (*storedKeyCompare)(dictCmpCache *cache, const void *key1, const void *key2);
|
||||
|
||||
/* Optional callback called when the dict is destroyed. */
|
||||
void (*onDictRelease)(dict *d);
|
||||
|
||||
/* Optional keylen to avoid duplication computation of key lengths. */
|
||||
keyLenFunc keyLen;
|
||||
keyCmpFuncWithLen keyCompareWithLen;
|
||||
} dictType;
|
||||
|
||||
#define DICTHT_SIZE(exp) ((exp) == -1 ? 0 : (unsigned long)1<<(exp))
|
||||
|
@ -143,7 +159,7 @@ typedef struct dictStats {
|
|||
unsigned long *clvector;
|
||||
} dictStats;
|
||||
|
||||
typedef void (dictScanFunction)(void *privdata, const dictEntry *de);
|
||||
typedef void (dictScanFunction)(void *privdata, const dictEntry *de, dictEntry **plink);
|
||||
typedef void *(dictDefragAllocFunction)(void *ptr);
|
||||
typedef struct {
|
||||
dictDefragAllocFunction *defragAlloc; /* Used for entries etc. */
|
||||
|
@ -206,40 +222,20 @@ int dictTryExpand(dict *d, unsigned long size);
|
|||
int dictShrink(dict *d, unsigned long size);
|
||||
int dictAdd(dict *d, void *key, void *val);
|
||||
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing);
|
||||
dictEntry *dictAddNonExistsByHash(dict *d, void *key, const uint64_t hash);
|
||||
void *dictFindPositionForInsert(dict *d, const void *key, dictEntry **existing);
|
||||
dictEntry *dictInsertAtPosition(dict *d, void *key, void *position);
|
||||
dictEntry *dictAddOrFind(dict *d, void *key);
|
||||
int dictReplace(dict *d, void *key, void *val);
|
||||
int dictDelete(dict *d, const void *key);
|
||||
dictEntry *dictUnlink(dict *d, const void *key);
|
||||
void dictFreeUnlinkedEntry(dict *d, dictEntry *he);
|
||||
dictEntry *dictTwoPhaseUnlinkFind(dict *d, const void *key, dictEntry ***plink, int *table_index);
|
||||
void dictTwoPhaseUnlinkFree(dict *d, dictEntry *he, dictEntry **plink, int table_index);
|
||||
dictEntryLink dictTwoPhaseUnlinkFind(dict *d, const void *key, int *table_index);
|
||||
void dictTwoPhaseUnlinkFree(dict *d, dictEntryLink llink, int table_index);
|
||||
void dictRelease(dict *d);
|
||||
dictEntry * dictFind(dict *d, const void *key);
|
||||
dictEntry *dictFindByHash(dict *d, const void *key, const uint64_t hash);
|
||||
dictEntry *dictFindByHashAndPtr(dict *d, const void *oldptr, const uint64_t hash);
|
||||
void *dictFetchValue(dict *d, const void *key);
|
||||
int dictShrinkIfNeeded(dict *d);
|
||||
int dictExpandIfNeeded(dict *d);
|
||||
void dictSetKey(dict *d, dictEntry* de, void *key);
|
||||
void dictSetVal(dict *d, dictEntry *de, void *val);
|
||||
void dictSetSignedIntegerVal(dictEntry *de, int64_t val);
|
||||
void dictSetUnsignedIntegerVal(dictEntry *de, uint64_t val);
|
||||
void dictSetDoubleVal(dictEntry *de, double val);
|
||||
int64_t dictIncrSignedIntegerVal(dictEntry *de, int64_t val);
|
||||
uint64_t dictIncrUnsignedIntegerVal(dictEntry *de, uint64_t val);
|
||||
double dictIncrDoubleVal(dictEntry *de, double val);
|
||||
void *dictEntryMetadata(dictEntry *de);
|
||||
void *dictGetKey(const dictEntry *de);
|
||||
void *dictGetVal(const dictEntry *de);
|
||||
int64_t dictGetSignedIntegerVal(const dictEntry *de);
|
||||
uint64_t dictGetUnsignedIntegerVal(const dictEntry *de);
|
||||
double dictGetDoubleVal(const dictEntry *de);
|
||||
double *dictGetDoubleValPtr(dictEntry *de);
|
||||
size_t dictMemUsage(const dict *d);
|
||||
size_t dictEntryMemUsage(void);
|
||||
size_t dictEntryMemUsage(int noValueDict);
|
||||
dictIterator *dictGetIterator(dict *d);
|
||||
dictIterator *dictGetSafeIterator(dict *d);
|
||||
void dictInitIterator(dictIterator *iter, dict *d);
|
||||
|
@ -258,7 +254,6 @@ void dictSetResizeEnabled(dictResizeEnable enable);
|
|||
int dictRehash(dict *d, int n);
|
||||
int dictRehashMicroseconds(dict *d, uint64_t us);
|
||||
void dictSetHashFunctionSeed(uint8_t *seed);
|
||||
uint8_t *dictGetHashFunctionSeed(void);
|
||||
unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, void *privdata);
|
||||
unsigned long dictScanDefrag(dict *d, unsigned long v, dictScanFunction *fn, dictDefragFunctions *defragfns, void *privdata);
|
||||
uint64_t dictGetHash(dict *d, const void *key);
|
||||
|
@ -269,6 +264,21 @@ dictStats* dictGetStatsHt(dict *d, int htidx, int full);
|
|||
void dictCombineStats(dictStats *from, dictStats *into);
|
||||
void dictFreeStats(dictStats *stats);
|
||||
|
||||
dictEntryLink dictFindLink(dict *d, const void *key, dictEntryLink *bucket);
|
||||
void dictSetKeyAtLink(dict *d, void *key, dictEntryLink *link, int newItem);
|
||||
|
||||
/* API relevant only when dict is used as a hash-map (no_value=0) */
|
||||
void dictSetKey(dict *d, dictEntry* de, void *key);
|
||||
void dictSetVal(dict *d, dictEntry *de, void *val);
|
||||
void *dictGetVal(const dictEntry *de);
|
||||
void dictSetDoubleVal(dictEntry *de, double val);
|
||||
double dictGetDoubleVal(const dictEntry *de);
|
||||
double *dictGetDoubleValPtr(dictEntry *de);
|
||||
void *dictFetchValue(dict *d, const void *key);
|
||||
void dictSetUnsignedIntegerVal(dictEntry *de, uint64_t val);
|
||||
uint64_t dictIncrUnsignedIntegerVal(dictEntry *de, uint64_t val);
|
||||
uint64_t dictGetUnsignedIntegerVal(const dictEntry *de);
|
||||
|
||||
#define dictForEach(d, ty, m, ...) do { \
|
||||
dictIterator *di = dictGetIterator(d); \
|
||||
dictEntry *de; \
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
/*
|
||||
* Copyright Redis Ltd. 2024 - present
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0 (RSALv2)
|
||||
* or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
/*
|
||||
* Copyright Redis Ltd. 2024 - present
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0 (RSALv2)
|
||||
* or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*
|
||||
*
|
||||
* WHAT IS EBUCKETS?
|
||||
|
|
|
@ -16,8 +16,9 @@
|
|||
* Copyright (c) 2011-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
|
||||
|
|
|
@ -5,8 +5,9 @@
|
|||
* Copyright (c) 2011-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#ifndef __ENDIANCONV_H
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
* Copyright (c) 2011-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#include "server.h"
|
||||
|
|
|
@ -3,8 +3,9 @@
|
|||
* Copyright (c) 2024-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#include "eventnotifier.h"
|
||||
|
|
|
@ -3,8 +3,9 @@
|
|||
* Copyright (c) 2024-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#ifndef EVENTNOTIFIER_H
|
||||
|
|
37
src/evict.c
37
src/evict.c
|
@ -5,8 +5,9 @@
|
|||
* Copyright (c) 2009-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#include "server.h"
|
||||
|
@ -130,27 +131,16 @@ int evictionPoolPopulate(redisDb *db, kvstore *samplekvs, struct evictionPoolEnt
|
|||
count = kvstoreDictGetSomeKeys(samplekvs,slot,samples,server.maxmemory_samples);
|
||||
for (j = 0; j < count; j++) {
|
||||
unsigned long long idle;
|
||||
sds key;
|
||||
robj *o;
|
||||
dictEntry *de;
|
||||
|
||||
de = samples[j];
|
||||
key = dictGetKey(de);
|
||||
|
||||
/* If the dictionary we are sampling from is not the main
|
||||
* dictionary (but the expires one) we need to lookup the key
|
||||
* again in the key dictionary to obtain the value object. */
|
||||
if (server.maxmemory_policy != MAXMEMORY_VOLATILE_TTL) {
|
||||
if (samplekvs != db->keys)
|
||||
de = kvstoreDictFind(db->keys, slot, key);
|
||||
o = dictGetVal(de);
|
||||
}
|
||||
|
||||
|
||||
dictEntry *de = samples[j];
|
||||
kvobj *kv = dictGetKV(de);
|
||||
sds key = kvobjGetKey(kv);
|
||||
|
||||
/* Calculate the idle time according to the policy. This is called
|
||||
* idle just because the code initially handled LRU, but is in fact
|
||||
* just a score where a higher score means better candidate. */
|
||||
if (server.maxmemory_policy & MAXMEMORY_FLAG_LRU) {
|
||||
idle = estimateObjectIdleTime(o);
|
||||
idle = estimateObjectIdleTime(kv);
|
||||
} else if (server.maxmemory_policy & MAXMEMORY_FLAG_LFU) {
|
||||
/* When we use an LRU policy, we sort the keys by idle time
|
||||
* so that we expire keys starting from greater idle time.
|
||||
|
@ -159,10 +149,10 @@ int evictionPoolPopulate(redisDb *db, kvstore *samplekvs, struct evictionPoolEnt
|
|||
* first. So inside the pool we put objects using the inverted
|
||||
* frequency subtracting the actual frequency to the maximum
|
||||
* frequency of 255. */
|
||||
idle = 255-LFUDecrAndReturn(o);
|
||||
idle = 255-LFUDecrAndReturn(kv);
|
||||
} else if (server.maxmemory_policy == MAXMEMORY_VOLATILE_TTL) {
|
||||
/* In this case the sooner the expire the better. */
|
||||
idle = ULLONG_MAX - dictGetSignedIntegerVal(de);
|
||||
idle = ULLONG_MAX - kvobjGetExpire(kv);
|
||||
} else {
|
||||
serverPanic("Unknown eviction policy in evictionPoolPopulate()");
|
||||
}
|
||||
|
@ -621,7 +611,7 @@ int performEvictions(void) {
|
|||
/* If the key exists, is our pick. Otherwise it is
|
||||
* a ghost and we need to try the next element. */
|
||||
if (de) {
|
||||
bestkey = dictGetKey(de);
|
||||
bestkey = kvobjGetKey(dictGetKV(de));
|
||||
break;
|
||||
} else {
|
||||
/* Ghost... Iterate again. */
|
||||
|
@ -649,7 +639,8 @@ int performEvictions(void) {
|
|||
int slot = kvstoreGetFairRandomDictIndex(kvs);
|
||||
de = kvstoreDictGetRandomKey(kvs, slot);
|
||||
if (de) {
|
||||
bestkey = dictGetKey(de);
|
||||
kvobj *kv = dictGetKV(de);
|
||||
bestkey = kvobjGetKey(kv);
|
||||
bestdbid = j;
|
||||
break;
|
||||
}
|
||||
|
|
49
src/expire.c
49
src/expire.c
|
@ -5,8 +5,9 @@
|
|||
* Copyright (c) 2009-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#include "server.h"
|
||||
|
@ -24,8 +25,8 @@
|
|||
static double avg_ttl_factor[16] = {0.98, 0.9604, 0.941192, 0.922368, 0.903921, 0.885842, 0.868126, 0.850763, 0.833748, 0.817073, 0.800731, 0.784717, 0.769022, 0.753642, 0.738569, 0.723798};
|
||||
|
||||
/* Helper function for the activeExpireCycle() function.
|
||||
* This function will try to expire the key that is stored in the hash table
|
||||
* entry 'de' of the 'expires' hash table of a Redis database.
|
||||
* This function will try to expire the key-value entry that is stored in the
|
||||
* hash table entry 'de' of the 'expires' hash table of a Redis database.
|
||||
*
|
||||
* If the key is found to be expired, it is removed from the database and
|
||||
* 1 is returned. Otherwise no operation is performed and 0 is returned.
|
||||
|
@ -34,13 +35,12 @@ static double avg_ttl_factor[16] = {0.98, 0.9604, 0.941192, 0.922368, 0.903921,
|
|||
*
|
||||
* The parameter 'now' is the current time in milliseconds as is passed
|
||||
* to the function to avoid too many gettimeofday() syscalls. */
|
||||
int activeExpireCycleTryExpire(redisDb *db, dictEntry *de, long long now) {
|
||||
long long t = dictGetSignedIntegerVal(de);
|
||||
if (now < t)
|
||||
int activeExpireCycleTryExpire(redisDb *db, kvobj *kv, long long now) {
|
||||
if (now < kvobjGetExpire(kv))
|
||||
return 0;
|
||||
|
||||
enterExecutionUnit(1, 0);
|
||||
sds key = dictGetKey(de);
|
||||
sds key = kvobjGetKey(kv);
|
||||
robj *keyobj = createStringObject(key,sdslen(key));
|
||||
deleteExpiredKeyAndPropagate(db,keyobj);
|
||||
decrRefCount(keyobj);
|
||||
|
@ -108,11 +108,12 @@ typedef struct {
|
|||
int ttl_samples; /* num keys with ttl not yet expired */
|
||||
} expireScanData;
|
||||
|
||||
void expireScanCallback(void *privdata, const dictEntry *const_de) {
|
||||
dictEntry *de = (dictEntry *)const_de;
|
||||
void expireScanCallback(void *privdata, const dictEntry *de, dictEntryLink plink) {
|
||||
UNUSED(plink);
|
||||
kvobj *kv = dictGetKV(de);
|
||||
expireScanData *data = privdata;
|
||||
long long ttl = dictGetSignedIntegerVal(de) - data->now;
|
||||
if (activeExpireCycleTryExpire(data->db, de, data->now)) {
|
||||
long long ttl = kvobjGetExpire(kv) - data->now;
|
||||
if (activeExpireCycleTryExpire(data->db, kv, data->now)) {
|
||||
data->expired++;
|
||||
}
|
||||
if (ttl > 0) {
|
||||
|
@ -463,14 +464,14 @@ void expireSlaveKeys(void) {
|
|||
while(dbids && dbid < server.dbnum) {
|
||||
if ((dbids & 1) != 0) {
|
||||
redisDb *db = server.db+dbid;
|
||||
dictEntry *expire = dbFindExpires(db, keyname);
|
||||
int expired = expire && activeExpireCycleTryExpire(server.db+dbid,expire,start);
|
||||
kvobj *kv = dbFindExpires(db, keyname);
|
||||
int expired = kv && activeExpireCycleTryExpire(server.db+dbid, kv, start);
|
||||
|
||||
/* If the key was not expired in this DB, we need to set the
|
||||
* corresponding bit in the new bitmap we set as value.
|
||||
* At the end of the loop if the bitmap is zero, it means we
|
||||
* no longer need to keep track of this key. */
|
||||
if (expire && !expired) {
|
||||
if (kv && !expired) {
|
||||
noexpire++;
|
||||
new_dbids |= (uint64_t)1 << dbid;
|
||||
}
|
||||
|
@ -498,7 +499,7 @@ void expireSlaveKeys(void) {
|
|||
|
||||
/* Track keys that received an EXPIRE or similar command in the context
|
||||
* of a writable slave. */
|
||||
void rememberSlaveKeyWithExpire(redisDb *db, robj *key) {
|
||||
void rememberSlaveKeyWithExpire(redisDb *db, sds key) {
|
||||
if (slaveKeysWithExpire == NULL) {
|
||||
static dictType dt = {
|
||||
dictSdsHash, /* hash function */
|
||||
|
@ -513,13 +514,13 @@ void rememberSlaveKeyWithExpire(redisDb *db, robj *key) {
|
|||
}
|
||||
if (db->id > 63) return;
|
||||
|
||||
dictEntry *de = dictAddOrFind(slaveKeysWithExpire,key->ptr);
|
||||
dictEntry *de = dictAddOrFind(slaveKeysWithExpire, key);
|
||||
/* If the entry was just created, set it to a copy of the SDS string
|
||||
* representing the key: we don't want to need to take those keys
|
||||
* in sync with the main DB. The keys will be removed by expireSlaveKeys()
|
||||
* as it scans to find keys to remove. */
|
||||
if (dictGetKey(de) == key->ptr) {
|
||||
dictSetKey(slaveKeysWithExpire, de, sdsdup(key->ptr));
|
||||
if (dictGetKey(de) == key) {
|
||||
dictSetKey(slaveKeysWithExpire, de, sdsdup(key));
|
||||
dictSetUnsignedIntegerVal(de,0);
|
||||
}
|
||||
|
||||
|
@ -653,13 +654,14 @@ void expireGenericCommand(client *c, long long basetime, int unit) {
|
|||
when += basetime;
|
||||
|
||||
/* No key, return zero. */
|
||||
if (lookupKeyWrite(c->db,key) == NULL) {
|
||||
kvobj *kv = lookupKeyWrite(c->db,key);
|
||||
if (kv == NULL) {
|
||||
addReply(c,shared.czero);
|
||||
return;
|
||||
}
|
||||
|
||||
if (flag) {
|
||||
current_expire = getExpire(c->db, key);
|
||||
current_expire = kvobjGetExpire(kv);
|
||||
|
||||
/* NX option is set, check current expiry */
|
||||
if (flag & EXPIRE_NX) {
|
||||
|
@ -764,14 +766,15 @@ void ttlGenericCommand(client *c, int output_ms, int output_abs) {
|
|||
long long expire, ttl = -1;
|
||||
|
||||
/* If the key does not exist at all, return -2 */
|
||||
if (lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH) == NULL) {
|
||||
kvobj *kv = lookupKeyReadWithFlags(c->db,c->argv[1],LOOKUP_NOTOUCH);
|
||||
if (kv == NULL) {
|
||||
addReplyLongLong(c,-2);
|
||||
return;
|
||||
}
|
||||
|
||||
/* The key exists. Return -1 if it has no expire, or the actual
|
||||
* TTL value otherwise. */
|
||||
expire = getExpire(c->db,c->argv[1]);
|
||||
expire = kvobjGetExpire(kv);
|
||||
if (expire != -1) {
|
||||
ttl = output_abs ? expire : expire-commandTimeSnapshot();
|
||||
if (ttl < 0) ttl = 0;
|
||||
|
|
|
@ -2,8 +2,9 @@
|
|||
* Copyright (c) 2009-Present, Redis Ltd.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Licensed under your choice of the Redis Source Available License 2.0
|
||||
* (RSALv2) or the Server Side Public License v1 (SSPLv1).
|
||||
* Licensed under your choice of (a) the Redis Source Available License 2.0
|
||||
* (RSALv2); or (b) the Server Side Public License v1 (SSPLv1); or (c) the
|
||||
* GNU Affero General Public License v3 (AGPLv3).
|
||||
*/
|
||||
|
||||
#ifndef _REDIS_FMACRO_H
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue