mirror of https://github.com/python/cpython.git
SF bug #942952: Weakness in tuple hash
(Basic approach and test concept by Tim Peters.) * Improved the hash to reduce collisions. * Added the torture test to the test suite.
This commit is contained in:
parent
504239fb38
commit
41bd02256f
|
@ -41,6 +41,25 @@ def f():
|
||||||
yield i
|
yield i
|
||||||
self.assertEqual(list(tuple(f())), range(1000))
|
self.assertEqual(list(tuple(f())), range(1000))
|
||||||
|
|
||||||
|
def test_hash(self):
|
||||||
|
# See SF bug 942952: Weakness in tuple hash
|
||||||
|
# The hash should:
|
||||||
|
# be non-commutative
|
||||||
|
# should spread-out closely spaced values
|
||||||
|
# should not exhibit cancellation in tuples like (x,(x,y))
|
||||||
|
# should be distinct from element hashes: hash(x)!=hash((x,))
|
||||||
|
# This test exercises those cases.
|
||||||
|
# For a pure random hash and N=50, the expected number of collisions
|
||||||
|
# is 7.3. Here we allow twice that number.
|
||||||
|
# Any worse and the hash function is sorely suspect.
|
||||||
|
|
||||||
|
N=50
|
||||||
|
base = range(N)
|
||||||
|
xp = [(i, j) for i in base for j in base]
|
||||||
|
inps = base + [(i, j) for i in base for j in xp] + \
|
||||||
|
[(i, j) for i in xp for j in base] + xp + zip(base)
|
||||||
|
collisions = len(inps) - len(set(map(hash, inps)))
|
||||||
|
self.assert_(collisions <= 15)
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
test_support.run_unittest(TupleTest)
|
test_support.run_unittest(TupleTest)
|
||||||
|
|
|
@ -12,6 +12,9 @@ What's New in Python 2.4 alpha 1?
|
||||||
Core and builtins
|
Core and builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Improved the tuple hashing algorithm to give fewer collisions in
|
||||||
|
common cases. Fixes bug #942952.
|
||||||
|
|
||||||
- Implemented generator expressions (PEP 289). Coded by Jiwon Seo.
|
- Implemented generator expressions (PEP 289). Coded by Jiwon Seo.
|
||||||
|
|
||||||
- Enabled the profiling of C extension functions (and builtins) - check
|
- Enabled the profiling of C extension functions (and builtins) - check
|
||||||
|
|
|
@ -262,15 +262,16 @@ tuplehash(PyTupleObject *v)
|
||||||
register long x, y;
|
register long x, y;
|
||||||
register int len = v->ob_size;
|
register int len = v->ob_size;
|
||||||
register PyObject **p;
|
register PyObject **p;
|
||||||
|
long mult = 1000003L;
|
||||||
x = 0x345678L;
|
x = 0x345678L;
|
||||||
p = v->ob_item;
|
p = v->ob_item;
|
||||||
while (--len >= 0) {
|
while (--len >= 0) {
|
||||||
y = PyObject_Hash(*p++);
|
y = PyObject_Hash(*p++);
|
||||||
if (y == -1)
|
if (y == -1)
|
||||||
return -1;
|
return -1;
|
||||||
x = (1000003*x) ^ y;
|
x = (x ^ y) * mult;
|
||||||
|
mult += 69068L + len + len;
|
||||||
}
|
}
|
||||||
x ^= v->ob_size;
|
|
||||||
if (x == -1)
|
if (x == -1)
|
||||||
x = -2;
|
x = -2;
|
||||||
return x;
|
return x;
|
||||||
|
|
Loading…
Reference in New Issue