Issue #28199: Microoptimized dict resizing. Based on patch by Naoki Inada.

2016-10-29 10:50:00 +03:00 · 2016-10-29 10:50:00 +03:00 · 041794908b
parent 43a5c1c9d3 d76d8bfee1
commit 041794908b
1 changed files with 63 additions and 60 deletions
--- a/Objects/dictobject.c
+++ b/Objects/dictobject.c
@ -1196,41 +1196,21 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
 }
 /*
-Internal routine used by dictresize() to insert an item which is
+Internal routine used by dictresize() to buid a hashtable of entries.
 known to be absent from the dict.  This routine also assumes that
 the dict contains no deleted entries.  Besides the performance benefit,
 using insertdict() in dictresize() is dangerous (SF bug #1456209).
 Note that no refcounts are changed by this routine; if needed, the caller
 is responsible for incref'ing `key` and `value`.
 Neither mp->ma_used nor k->dk_usable are modified by this routine; the caller
 must set them correctly
 */
 static void
-insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash,
+build_indices(PyDictKeysObject *keys, PyDictKeyEntry *ep, Py_ssize_t n)
                 PyObject *value)
 {
-    size_t i;
+    size_t mask = (size_t)DK_SIZE(keys) - 1;
-    PyDictKeysObject *k = mp->ma_keys;
+    for (Py_ssize_t ix = 0; ix != n; ix++, ep++) {
-    size_t mask = (size_t)DK_SIZE(k)-1;
+        Py_hash_t hash = ep->me_hash;
-    PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
+        size_t i = hash & mask;
-    PyDictKeyEntry *ep;
+        for (size_t perturb = hash; dk_get_index(keys, i) != DKIX_EMPTY;) {
    assert(k->dk_lookup != NULL);
    assert(value != NULL);
    assert(key != NULL);
    assert(PyUnicode_CheckExact(key) || k->dk_lookup == lookdict);
    i = hash & mask;
    for (size_t perturb = hash; dk_get_index(k, i) != DKIX_EMPTY;) {
            perturb >>= PERTURB_SHIFT;
            i = mask & ((i << 2) + i + perturb + 1);
        }
-    ep = &ep0[k->dk_nentries];
+        dk_set_index(keys, i, ix);
-    assert(ep->me_value == NULL);
+    }
    dk_set_index(k, i, k->dk_nentries);
    k->dk_nentries++;
    ep->me_key = key;
    ep->me_hash = hash;
    ep->me_value = value;
 }
 /*
@ -1246,10 +1226,10 @@ but can be resplit by make_keys_shared().
 static int
 dictresize(PyDictObject *mp, Py_ssize_t minused)
 {
-    Py_ssize_t i, newsize;
+    Py_ssize_t newsize, numentries;
    PyDictKeysObject *oldkeys;
    PyObject **oldvalues;
-    PyDictKeyEntry *ep0;
+    PyDictKeyEntry *oldentries, *newentries;
    /* Find the smallest table size > minused. */
    for (newsize = PyDict_MINSIZE;
@ -1260,8 +1240,14 @@ dictresize(PyDictObject *mp, Py_ssize_t minused)
        PyErr_NoMemory();
        return -1;
    }
    oldkeys = mp->ma_keys;
-    oldvalues = mp->ma_values;
+
    /* NOTE: Current odict checks mp->ma_keys to detect resize happen.
     * So we can't reuse oldkeys even if oldkeys->dk_size == newsize.
     * TODO: Try reusing oldkeys when reimplement odict.
     */
    /* Allocate a new table. */
    mp->ma_keys = new_keys_object(newsize);
    if (mp->ma_keys == NULL) {
@ -1270,42 +1256,59 @@ dictresize(PyDictObject *mp, Py_ssize_t minused)
    }
    if (oldkeys->dk_lookup == lookdict)
        mp->ma_keys->dk_lookup = lookdict;
-    mp->ma_values = NULL;
+
-    ep0 = DK_ENTRIES(oldkeys);
+    numentries = mp->ma_used;
-    /* Main loop below assumes we can transfer refcount to new keys
+    oldentries = DK_ENTRIES(oldkeys);
-     * and that value is stored in me_value.
+    newentries = DK_ENTRIES(mp->ma_keys);
-     * Increment ref-counts and copy values here to compensate
+    oldvalues = mp->ma_values;
     * This (resizing a split table) should be relatively rare */
    if (oldvalues != NULL) {
-        for (i = 0; i < oldkeys->dk_nentries; i++) {
+        /* Convert split table into new combined table.
-            if (oldvalues[i] != NULL) {
+         * We must incref keys; we can transfer values.
-                Py_INCREF(ep0[i].me_key);
+         * Note that values of split table is always dense.
-                ep0[i].me_value = oldvalues[i];
+         */
        for (Py_ssize_t i = 0; i < numentries; i++) {
            assert(oldvalues[i] != NULL);
            PyDictKeyEntry *ep = &oldentries[i];
            PyObject *key = ep->me_key;
            Py_INCREF(key);
            newentries[i].me_key = key;
            newentries[i].me_hash = ep->me_hash;
            newentries[i].me_value = oldvalues[i];
        }
-        }
+
    }
    /* Main loop */
    for (i = 0; i < oldkeys->dk_nentries; i++) {
        PyDictKeyEntry *ep = &ep0[i];
        if (ep->me_value != NULL) {
            insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);
        }
    }
    mp->ma_keys->dk_usable -= mp->ma_used;
    if (oldvalues != NULL) {
        /* NULL out me_value slot in oldkeys, in case it was shared */
        for (i = 0; i < oldkeys->dk_nentries; i++)
            ep0[i].me_value = NULL;
        DK_DECREF(oldkeys);
        mp->ma_values = NULL;
        if (oldvalues != empty_values) {
            free_values(oldvalues);
        }
    }
    else {  // combined table.
        if (oldkeys->dk_nentries == numentries) {
            memcpy(newentries, oldentries, numentries * sizeof(PyDictKeyEntry));
        }
        else {
            PyDictKeyEntry *ep = oldentries;
            for (Py_ssize_t i = 0; i < numentries; i++) {
                while (ep->me_value == NULL)
                    ep++;
                newentries[i] = *ep++;
            }
        }
        assert(oldkeys->dk_lookup != lookdict_split);
        assert(oldkeys->dk_refcnt == 1);
        if (oldkeys->dk_size == PyDict_MINSIZE &&
            numfreekeys < PyDict_MAXFREELIST) {
            DK_DEBUG_DECREF keys_free_list[numfreekeys++] = oldkeys;
        }
        else {
            DK_DEBUG_DECREF PyObject_FREE(oldkeys);
        }
    }
    build_indices(mp->ma_keys, newentries, numentries);
    mp->ma_keys->dk_usable -= numentries;
    mp->ma_keys->dk_nentries = numentries;
    return 0;
 }