diff --git a/Lib/test/test_tuple.py b/Lib/test/test_tuple.py index 4fe299f18261..a3f40dd07d62 100644 --- a/Lib/test/test_tuple.py +++ b/Lib/test/test_tuple.py @@ -41,6 +41,25 @@ def f(): yield i self.assertEqual(list(tuple(f())), range(1000)) + def test_hash(self): + # See SF bug 942952: Weakness in tuple hash + # The hash should: + # be non-commutative + # should spread-out closely spaced values + # should not exhibit cancellation in tuples like (x,(x,y)) + # should be distinct from element hashes: hash(x)!=hash((x,)) + # This test exercises those cases. + # For a pure random hash and N=50, the expected number of collisions + # is 7.3. Here we allow twice that number. + # Any worse and the hash function is sorely suspect. + + N=50 + base = range(N) + xp = [(i, j) for i in base for j in base] + inps = base + [(i, j) for i in base for j in xp] + \ + [(i, j) for i in xp for j in base] + xp + zip(base) + collisions = len(inps) - len(set(map(hash, inps))) + self.assert_(collisions <= 15) def test_main(): test_support.run_unittest(TupleTest) diff --git a/Misc/NEWS b/Misc/NEWS index ab705af7bdc5..2c5dfd9b95f0 100644 --- a/Misc/NEWS +++ b/Misc/NEWS @@ -12,6 +12,9 @@ What's New in Python 2.4 alpha 1? Core and builtins ----------------- +- Improved the tuple hashing algorithm to give fewer collisions in + common cases. Fixes bug #942952. + - Implemented generator expressions (PEP 289). Coded by Jiwon Seo. - Enabled the profiling of C extension functions (and builtins) - check diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c index 159dc44fd07e..4cb80f0ed50a 100644 --- a/Objects/tupleobject.c +++ b/Objects/tupleobject.c @@ -262,15 +262,16 @@ tuplehash(PyTupleObject *v) register long x, y; register int len = v->ob_size; register PyObject **p; + long mult = 1000003L; x = 0x345678L; p = v->ob_item; while (--len >= 0) { y = PyObject_Hash(*p++); if (y == -1) return -1; - x = (1000003*x) ^ y; + x = (x ^ y) * mult; + mult += 69068L + len + len; } - x ^= v->ob_size; if (x == -1) x = -2; return x;