mirror of https://github.com/python/cpython.git
Issue #28199: Microoptimized dict resizing. Based on patch by Naoki Inada.
This commit is contained in:
commit
041794908b
|
@ -1196,41 +1196,21 @@ insertdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject *value)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Internal routine used by dictresize() to insert an item which is
|
Internal routine used by dictresize() to buid a hashtable of entries.
|
||||||
known to be absent from the dict. This routine also assumes that
|
|
||||||
the dict contains no deleted entries. Besides the performance benefit,
|
|
||||||
using insertdict() in dictresize() is dangerous (SF bug #1456209).
|
|
||||||
Note that no refcounts are changed by this routine; if needed, the caller
|
|
||||||
is responsible for incref'ing `key` and `value`.
|
|
||||||
Neither mp->ma_used nor k->dk_usable are modified by this routine; the caller
|
|
||||||
must set them correctly
|
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
insertdict_clean(PyDictObject *mp, PyObject *key, Py_hash_t hash,
|
build_indices(PyDictKeysObject *keys, PyDictKeyEntry *ep, Py_ssize_t n)
|
||||||
PyObject *value)
|
|
||||||
{
|
{
|
||||||
size_t i;
|
size_t mask = (size_t)DK_SIZE(keys) - 1;
|
||||||
PyDictKeysObject *k = mp->ma_keys;
|
for (Py_ssize_t ix = 0; ix != n; ix++, ep++) {
|
||||||
size_t mask = (size_t)DK_SIZE(k)-1;
|
Py_hash_t hash = ep->me_hash;
|
||||||
PyDictKeyEntry *ep0 = DK_ENTRIES(mp->ma_keys);
|
size_t i = hash & mask;
|
||||||
PyDictKeyEntry *ep;
|
for (size_t perturb = hash; dk_get_index(keys, i) != DKIX_EMPTY;) {
|
||||||
|
|
||||||
assert(k->dk_lookup != NULL);
|
|
||||||
assert(value != NULL);
|
|
||||||
assert(key != NULL);
|
|
||||||
assert(PyUnicode_CheckExact(key) || k->dk_lookup == lookdict);
|
|
||||||
i = hash & mask;
|
|
||||||
for (size_t perturb = hash; dk_get_index(k, i) != DKIX_EMPTY;) {
|
|
||||||
perturb >>= PERTURB_SHIFT;
|
perturb >>= PERTURB_SHIFT;
|
||||||
i = mask & ((i << 2) + i + perturb + 1);
|
i = mask & ((i << 2) + i + perturb + 1);
|
||||||
}
|
}
|
||||||
ep = &ep0[k->dk_nentries];
|
dk_set_index(keys, i, ix);
|
||||||
assert(ep->me_value == NULL);
|
}
|
||||||
dk_set_index(k, i, k->dk_nentries);
|
|
||||||
k->dk_nentries++;
|
|
||||||
ep->me_key = key;
|
|
||||||
ep->me_hash = hash;
|
|
||||||
ep->me_value = value;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1246,10 +1226,10 @@ but can be resplit by make_keys_shared().
|
||||||
static int
|
static int
|
||||||
dictresize(PyDictObject *mp, Py_ssize_t minused)
|
dictresize(PyDictObject *mp, Py_ssize_t minused)
|
||||||
{
|
{
|
||||||
Py_ssize_t i, newsize;
|
Py_ssize_t newsize, numentries;
|
||||||
PyDictKeysObject *oldkeys;
|
PyDictKeysObject *oldkeys;
|
||||||
PyObject **oldvalues;
|
PyObject **oldvalues;
|
||||||
PyDictKeyEntry *ep0;
|
PyDictKeyEntry *oldentries, *newentries;
|
||||||
|
|
||||||
/* Find the smallest table size > minused. */
|
/* Find the smallest table size > minused. */
|
||||||
for (newsize = PyDict_MINSIZE;
|
for (newsize = PyDict_MINSIZE;
|
||||||
|
@ -1260,8 +1240,14 @@ dictresize(PyDictObject *mp, Py_ssize_t minused)
|
||||||
PyErr_NoMemory();
|
PyErr_NoMemory();
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
oldkeys = mp->ma_keys;
|
oldkeys = mp->ma_keys;
|
||||||
oldvalues = mp->ma_values;
|
|
||||||
|
/* NOTE: Current odict checks mp->ma_keys to detect resize happen.
|
||||||
|
* So we can't reuse oldkeys even if oldkeys->dk_size == newsize.
|
||||||
|
* TODO: Try reusing oldkeys when reimplement odict.
|
||||||
|
*/
|
||||||
|
|
||||||
/* Allocate a new table. */
|
/* Allocate a new table. */
|
||||||
mp->ma_keys = new_keys_object(newsize);
|
mp->ma_keys = new_keys_object(newsize);
|
||||||
if (mp->ma_keys == NULL) {
|
if (mp->ma_keys == NULL) {
|
||||||
|
@ -1270,42 +1256,59 @@ dictresize(PyDictObject *mp, Py_ssize_t minused)
|
||||||
}
|
}
|
||||||
if (oldkeys->dk_lookup == lookdict)
|
if (oldkeys->dk_lookup == lookdict)
|
||||||
mp->ma_keys->dk_lookup = lookdict;
|
mp->ma_keys->dk_lookup = lookdict;
|
||||||
mp->ma_values = NULL;
|
|
||||||
ep0 = DK_ENTRIES(oldkeys);
|
numentries = mp->ma_used;
|
||||||
/* Main loop below assumes we can transfer refcount to new keys
|
oldentries = DK_ENTRIES(oldkeys);
|
||||||
* and that value is stored in me_value.
|
newentries = DK_ENTRIES(mp->ma_keys);
|
||||||
* Increment ref-counts and copy values here to compensate
|
oldvalues = mp->ma_values;
|
||||||
* This (resizing a split table) should be relatively rare */
|
|
||||||
if (oldvalues != NULL) {
|
if (oldvalues != NULL) {
|
||||||
for (i = 0; i < oldkeys->dk_nentries; i++) {
|
/* Convert split table into new combined table.
|
||||||
if (oldvalues[i] != NULL) {
|
* We must incref keys; we can transfer values.
|
||||||
Py_INCREF(ep0[i].me_key);
|
* Note that values of split table is always dense.
|
||||||
ep0[i].me_value = oldvalues[i];
|
*/
|
||||||
|
for (Py_ssize_t i = 0; i < numentries; i++) {
|
||||||
|
assert(oldvalues[i] != NULL);
|
||||||
|
PyDictKeyEntry *ep = &oldentries[i];
|
||||||
|
PyObject *key = ep->me_key;
|
||||||
|
Py_INCREF(key);
|
||||||
|
newentries[i].me_key = key;
|
||||||
|
newentries[i].me_hash = ep->me_hash;
|
||||||
|
newentries[i].me_value = oldvalues[i];
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
/* Main loop */
|
|
||||||
for (i = 0; i < oldkeys->dk_nentries; i++) {
|
|
||||||
PyDictKeyEntry *ep = &ep0[i];
|
|
||||||
if (ep->me_value != NULL) {
|
|
||||||
insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
mp->ma_keys->dk_usable -= mp->ma_used;
|
|
||||||
if (oldvalues != NULL) {
|
|
||||||
/* NULL out me_value slot in oldkeys, in case it was shared */
|
|
||||||
for (i = 0; i < oldkeys->dk_nentries; i++)
|
|
||||||
ep0[i].me_value = NULL;
|
|
||||||
DK_DECREF(oldkeys);
|
DK_DECREF(oldkeys);
|
||||||
|
mp->ma_values = NULL;
|
||||||
if (oldvalues != empty_values) {
|
if (oldvalues != empty_values) {
|
||||||
free_values(oldvalues);
|
free_values(oldvalues);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else { // combined table.
|
||||||
|
if (oldkeys->dk_nentries == numentries) {
|
||||||
|
memcpy(newentries, oldentries, numentries * sizeof(PyDictKeyEntry));
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
|
PyDictKeyEntry *ep = oldentries;
|
||||||
|
for (Py_ssize_t i = 0; i < numentries; i++) {
|
||||||
|
while (ep->me_value == NULL)
|
||||||
|
ep++;
|
||||||
|
newentries[i] = *ep++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
assert(oldkeys->dk_lookup != lookdict_split);
|
assert(oldkeys->dk_lookup != lookdict_split);
|
||||||
assert(oldkeys->dk_refcnt == 1);
|
assert(oldkeys->dk_refcnt == 1);
|
||||||
|
if (oldkeys->dk_size == PyDict_MINSIZE &&
|
||||||
|
numfreekeys < PyDict_MAXFREELIST) {
|
||||||
|
DK_DEBUG_DECREF keys_free_list[numfreekeys++] = oldkeys;
|
||||||
|
}
|
||||||
|
else {
|
||||||
DK_DEBUG_DECREF PyObject_FREE(oldkeys);
|
DK_DEBUG_DECREF PyObject_FREE(oldkeys);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
build_indices(mp->ma_keys, newentries, numentries);
|
||||||
|
mp->ma_keys->dk_usable -= numentries;
|
||||||
|
mp->ma_keys->dk_nentries = numentries;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue