// cedar -- C++ implementation of Efficiently-updatable Double ARray trie // $Id: cedarpp.h 1916 2017-07-12 07:30:56Z ynaga $ // Copyright (c) 2009-2015 Naoki Yoshinaga /* * Copyright (C) 2023, KylinSoft Co., Ltd. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * */ #ifndef CEDAR_H #define CEDAR_H #include #include #include #include #include #ifdef HAVE_CONFIG_H #include "config.h" #endif #define STATIC_ASSERT(e, msg) typedef char msg[(e) ? 1 : -1] namespace cedar { // typedefs #if LONG_BIT == 64 typedef unsigned long npos_t; // possibly compatible with size_t #else typedef unsigned long long npos_t; #endif typedef unsigned char uchar; static const npos_t TAIL_OFFSET_MASK = static_cast (0xffffffff); static const npos_t NODE_INDEX_MASK = static_cast (0xffffffff) << 32; template struct NaN { enum { N1 = -1, N2 = -2 }; }; template <> struct NaN { enum { N1 = 0x7f800001, N2 = 0x7f800002 }; }; static const int MAX_ALLOC_SIZE = 1 << 16; // must be divisible by 256 // dynamic double array template ::N1, const int NO_PATH = NaN ::N2, const bool ORDERED = true, const int MAX_TRIAL = 1, const size_t NUM_TRACKING_NODES = 0> class da { public: enum error_code { CEDAR_NO_VALUE = NO_VALUE, CEDAR_NO_PATH = NO_PATH }; typedef value_type result_type; struct result_pair_type { value_type value; size_t length; // prefix length }; struct result_triple_type { // for predict () value_type value; size_t length; // suffix length npos_t id; // node id of value }; struct node { union { int base; value_type value; }; // negative means prev empty index int check; // negative means next empty index node (const int base_ = 0, const int check_ = 0) : base (base_), check (check_) {} }; struct ninfo { // x1.5 update speed; +.25 % memory (8n -> 10n) uchar sibling; // right sibling (= 0 if not exist) uchar child; // first child ninfo () : sibling (0), child (0) {} }; struct block { // a block w/ 256 elements int prev; // prev block; 3 bytes int next; // next block; 3 bytes short num; // # empty elements; 0 - 256 short reject; // minimum # branching failed to locate; soft limit int trial; // # trial int ehead; // first empty item block () : prev (0), next (0), num (256), reject (257), trial (0), ehead (0) {} }; da () : tracking_node (), _array (0), _tail (0), _tail0 (0), _ninfo (0), _block (0), _bheadF (0), _bheadC (0), _bheadO (0), _capacity (0), _size (0), _quota (0), _quota0 (0), _no_delete (false), _reject () { #pragma GCC diagnostic ignored "-Wunused-local-typedefs" STATIC_ASSERT(sizeof (value_type) <= sizeof (int), value_type_is_not_supported___maintain_a_value_array_by_yourself_and_store_its_index_to_trie ); #pragma GCC diagnostic warning "-Wunused-local-typedefs" _initialize (); } ~da () { clear (false); } size_t capacity () const { return static_cast (_capacity); } size_t size () const { return static_cast (_size); } size_t length () const { return static_cast (*_length); } size_t total_size () const { return sizeof (node) * _size; } size_t unit_size () const { return sizeof (node); } size_t nonzero_size () const { size_t i = 0; for (int to = 0; to < _size; ++to) if (_array[to].check >= 0) ++i; return i; } size_t nonzero_length () const { size_t i (0), j (0); for (int to = 0; to < _size; ++to) { const node& n = _array[to]; if (n.check >= 0 && _array[n.check].base != to && n.base < 0) { ++j; for (const char* p = &_tail[-n.base]; *p; ++p) ++i; } } return i + j * (1 + sizeof (value_type)); } size_t num_keys () const { size_t i = 0; for (int to = 0; to < _size; ++to) { const node& n = _array[to]; if (n.check >= 0 && (_array[n.check].base == to || n.base < 0)) ++i; } return i; } // interfance template T exactMatchSearch (const char* key) const { return exactMatchSearch (key, std::strlen (key)); } template T exactMatchSearch (const char* key, size_t len, npos_t from = 0) const { union { int i; value_type x; } b; size_t pos = 0; b.i = _find (key, from, pos, len); if (b.i == CEDAR_NO_PATH) b.i = CEDAR_NO_VALUE; T result; _set_result (&result, b.x, len, from); return result; } template size_t commonPrefixSearch (const char* key, T* result, size_t result_len) const { return commonPrefixSearch (key, result, result_len, std::strlen (key)); } template size_t commonPrefixSearch (const char* key, T* result, size_t result_len, size_t len, npos_t from = 0) const { size_t num = 0; for (size_t pos = 0; pos < len; ) { union { int i; value_type x; } b; b.i = _find (key, from, pos, pos + 1); if (b.i == CEDAR_NO_VALUE) continue; if (b.i == CEDAR_NO_PATH) return num; if (num < result_len) _set_result (&result[num], b.x, pos, from); ++num; } return num; } // predict key from double array template size_t commonPrefixPredict (const char* key, T* result, size_t result_len) { return commonPrefixPredict (key, result, result_len, std::strlen (key)); } template size_t commonPrefixPredict (const char* key, T* result, size_t result_len, size_t len, npos_t from = 0) { size_t num (0), pos (0), p (0); if (_find (key, from, pos, len) == CEDAR_NO_PATH) return 0; union { int i; value_type x; } b; const npos_t root = from; for (b.i = begin (from, p); b.i != CEDAR_NO_PATH; b.i = next (from, p, root)) { if (num < result_len) _set_result (&result[num], b.x, p, from); ++num; } return num; } void suffix (char* key, size_t len, npos_t to) const { key[len] = '\0'; if (const int offset = static_cast (to >> 32)) { to &= TAIL_OFFSET_MASK; size_t len_tail = std::strlen (&_tail[-_array[to].base]); if (len > len_tail) len -= len_tail; else len_tail = len, len = 0; std::memcpy (&key[len], &_tail[static_cast (offset) - len_tail], len_tail); } while (len--) { const int from = _array[to].check; key[len] = static_cast (_array[from].base ^ static_cast (to)); to = static_cast (from); } } value_type traverse (const char* key, npos_t& from, size_t& pos) const { return traverse (key, from, pos, std::strlen (key)); } value_type traverse (const char* key, npos_t& from, size_t& pos, size_t len) const { union { int i; value_type x; } b; b.i = _find (key, from, pos, len); return b.x; } struct empty_callback { void operator () (const int, const int) {} }; // dummy empty function value_type& update (const char* key) { return update (key, std::strlen (key)); } value_type& update (const char* key, size_t len, value_type val = value_type (0)) { npos_t from (0); size_t pos (0); return update (key, from, pos, len, val); } value_type& update (const char* key, npos_t& from, size_t& pos, size_t len, value_type val = value_type (0)) { empty_callback cf; return update (key, from, pos, len, val, cf); } template value_type& update (const char* key, npos_t& from, size_t& pos, size_t len, value_type val, T& cf) { if (! len && ! from) _err (__FILE__, __LINE__, "failed to insert zero-length key\n"); #ifndef USE_FAST_LOAD if (! _ninfo || ! _block) restore (); #endif npos_t offset = from >> 32; if (! offset) { // node on trie for (const uchar* const key_ = reinterpret_cast (key); _array[from].base >= 0; ++pos) { if (pos == len) // could be reduced { const int to = _follow (from, 0, cf); return _array[to].value += val; } from = static_cast (_follow (from, key_[pos], cf)); } offset = static_cast (-_array[from].base); } if (offset >= sizeof (int)) { // go to _tail const size_t pos_orig = pos; char* const tail = &_tail[offset] - pos; while (pos < len && key[pos] == tail[pos]) ++pos; // if (pos == len && tail[pos] == '\0') { // found exact key if (const npos_t moved = pos - pos_orig) { // search end on tail from &= TAIL_OFFSET_MASK; from |= (offset + moved) << 32; } return *reinterpret_cast (&tail[len + 1]) += val; } // otherwise, insert the common prefix in tail if any if (from >> 32) { from &= TAIL_OFFSET_MASK; // reset to update tail offset for (npos_t offset_ = static_cast (-_array[from].base); offset_ < offset; ) { from = static_cast (_follow (from, static_cast (_tail[offset_]), cf)); ++offset_; // this shows intricacy in debugging updatable double array trie if (NUM_TRACKING_NODES) // keep the traversed node (on tail) updated for (size_t j = 0; tracking_node[j] != 0; ++j) if (tracking_node[j] >> 32 == offset_) tracking_node[j] = static_cast (from); } } for (size_t pos_ = pos_orig; pos_ < pos; ++pos_) from = static_cast (_follow (from, static_cast (key[pos_]), cf)); npos_t moved = pos - pos_orig; if (tail[pos]) { // remember to move offset to existing tail const int to_ = _follow (from, static_cast (tail[pos]), cf); _array[to_].base = - static_cast (offset + ++moved); moved -= 1 + sizeof (value_type); // keep record } moved += offset; for (npos_t i = offset; i <= moved; i += 1 + sizeof (value_type)) { if (_quota0 == ++*_length0) { #ifdef USE_EXACT_FIT _quota0 += *_length0 >= MAX_ALLOC_SIZE ? MAX_ALLOC_SIZE : *_length0; #else _quota0 += _quota0; #endif _realloc_array (_tail0, _quota0, *_length0); } _tail0[*_length0] = static_cast (i); } if (pos == len || tail[pos] == '\0') { const int to = _follow (from, 0, cf); // could be reduced if (pos == len) return _array[to].value += val; // set value on trie _array[to].value += *reinterpret_cast (&tail[pos + 1]); } from = static_cast (_follow (from, static_cast (key[pos]), cf)); ++pos; } const int needed = static_cast (len - pos + 1 + sizeof (value_type)); if (pos == len && *_length0) { // reuse const int offset0 = _tail0[*_length0]; _tail[offset0] = '\0'; _array[from].base = -offset0; --*_length0; return *reinterpret_cast (&_tail[offset0 + 1]) = val; } if (_quota < *_length + needed) { #ifdef USE_EXACT_FIT _quota += needed > *_length || needed > MAX_ALLOC_SIZE ? needed : (*_length >= MAX_ALLOC_SIZE ? MAX_ALLOC_SIZE : *_length); #else _quota += _quota >= needed ? _quota : needed; #endif _realloc_array (_tail, _quota, *_length); } _array[from].base = -*_length; const size_t pos_orig = pos; char* const tail = &_tail[*_length] - pos; if (pos < len) { do tail[pos] = key[pos]; while (++pos < len); from |= (static_cast (*_length) + (len - pos_orig)) << 32; } *_length += needed; return *reinterpret_cast (&tail[len + 1]) += val; } // easy-going erase () without compression int erase (const char* key) { return erase (key, std::strlen (key)); } int erase (const char* key, size_t len, npos_t from = 0) { size_t pos = 0; const int i = _find (key, from, pos, len); if (i == CEDAR_NO_PATH || i == CEDAR_NO_VALUE) return -1; if (from >> 32) from &= TAIL_OFFSET_MASK; // leave tail as is bool flag = _array[from].base < 0; // have sibling int e = flag ? static_cast (from) : _array[from].base ^ 0; from = _array[e].check; do { const node& n = _array[from]; flag = _ninfo[n.base ^ _ninfo[from].child].sibling; if (flag) _pop_sibling (from, n.base, static_cast (n.base ^ e)); _push_enode (e); e = static_cast (from); from = static_cast (_array[from].check); } while (! flag); return 0; } int build (size_t num, const char** key, const size_t* len = 0, const value_type* val = 0) { for (size_t i = 0; i < num; ++i) update (key[i], len ? len[i] : std::strlen (key[i]), val ? val[i] : value_type (i)); return 0; } template void dump (T* result, const size_t result_len) { union { int i; value_type x; } b; size_t num (0), p (0); npos_t from = 0; for (b.i = begin (from, p); b.i != CEDAR_NO_PATH; b.i = next (from, p)) if (num < result_len) _set_result (&result[num++], b.x, p, from); else _err (__FILE__, __LINE__, "dump() needs array of length = num_keys()\n"); } void shrink_tail () { union { char* tail; int* length; } t; const size_t length_ = static_cast (*_length) - static_cast (*_length0) * (1 + sizeof (value_type)); t.tail = static_cast (std::malloc (length_)); if (! t.tail) _err (__FILE__, __LINE__, "memory allocation failed\n"); *t.length = static_cast (sizeof (int)); for (int to = 0; to < _size; ++to) { node& n = _array[to]; if (n.check >= 0 && _array[n.check].base != to && n.base < 0) { char* const tail (&t.tail[*t.length]), * const tail_ (&_tail[-n.base]); n.base = - *t.length; int i = 0; do tail[i] = tail_[i]; while (tail[i++]); *reinterpret_cast (&tail[i]) = *reinterpret_cast (&tail_[i]); *t.length += i + static_cast (sizeof (value_type)); } } std::free (_tail); _tail = t.tail; _realloc_array (_tail, *_length, *_length); _quota = *_length; _realloc_array (_tail0, 1); _quota0 = 1; } int save (const char* fn, const char* mode, const bool shrink) { if (shrink) shrink_tail (); return save (fn, mode); } int save (const char* fn, const char* mode = "wb") const { // _test (); FILE* fp = std::fopen (fn, mode); if (! fp) return -1; std::fwrite (_tail, sizeof (char), static_cast (*_length), fp); std::fwrite (_array, sizeof (node), static_cast (_size), fp); std::fclose (fp); #ifdef USE_FAST_LOAD const char* const info = std::strcat (std::strcpy (new char[std::strlen (fn) + 5], fn), ".sbl"); fp = std::fopen (info, mode); delete [] info; // resolve memory leak if (! fp) return -1; std::fwrite (&_bheadF, sizeof (int), 1, fp); std::fwrite (&_bheadC, sizeof (int), 1, fp); std::fwrite (&_bheadO, sizeof (int), 1, fp); std::fwrite (_ninfo, sizeof (ninfo), static_cast (_size), fp); std::fwrite (_block, sizeof (block), static_cast (_size >> 8), fp); std::fclose (fp); #endif return 0; } int open (const char* fn, const char* mode = "rb", const size_t offset = 0, size_t size_ = 0) { FILE* fp = std::fopen (fn, mode); if (! fp) return -1; // get size if (! size_) { if (std::fseek (fp, 0, SEEK_END) != 0) return -1; size_ = static_cast (std::ftell (fp)); if (std::fseek (fp, 0, SEEK_SET) != 0) return -1; } if (size_ <= offset) return -1; if (std::fseek (fp, static_cast (offset), SEEK_SET) != 0) return -1; int len = 0; if (std::fread (&len, sizeof (int), 1, fp) != 1) return -1; const size_t length_ = static_cast (len); if (size_ <= offset + length_) return -1; // set array clear (false); size_ = (size_ - offset - length_) / sizeof (node); _array = static_cast (std::malloc (sizeof (node) * size_)); _tail = static_cast (std::malloc (length_)); _tail0 = static_cast (std::malloc (sizeof (int))); #ifdef USE_FAST_LOAD _ninfo = static_cast (std::malloc (sizeof (ninfo) * size_)); _block = static_cast (std::malloc (sizeof (block) * size_)); if (! _array || ! _tail || ! _tail0 || ! _ninfo || ! _block) #else if (! _array || ! _tail || ! _tail0) #endif _err (__FILE__, __LINE__, "memory allocation failed\n"); if (std::fseek (fp, static_cast (offset), SEEK_SET) != 0) return -1; if (length_ != std::fread (_tail, sizeof (char), length_, fp) || size_ != std::fread (_array, sizeof (node), size_, fp)) return -1; std::fclose (fp); _size = static_cast (size_); *_length0 = 0; #ifdef USE_FAST_LOAD const char* const info = std::strcat (std::strcpy (new char[std::strlen (fn) + 5], fn), ".sbl"); fp = std::fopen (info, mode); delete [] info; // resolve memory leak if (! fp) return -1; std::fread (&_bheadF, sizeof (int), 1, fp); std::fread (&_bheadC, sizeof (int), 1, fp); std::fread (&_bheadO, sizeof (int), 1, fp); if (size_ != std::fread (_ninfo, sizeof (ninfo), size_, fp) || size_ >> 8 != std::fread (_block, sizeof (block), size_ >> 8, fp)) return -1; std::fclose (fp); _capacity = _size; _quota = *_length; _quota0 = 1; #endif return 0; } #ifndef USE_FAST_LOAD void restore () { // restore information to update if (! _block) _restore_block (); if (! _ninfo) _restore_ninfo (); _capacity = _size; _quota = *_length; _quota0 = 1; } #endif void set_array (void* p, size_t size_ = 0) { // ad-hoc clear (false); if (size_) size_ = size_ * unit_size () - static_cast (*static_cast (p)); _tail = static_cast (p); _array = reinterpret_cast (_tail + *_length); _size = static_cast (size_ / unit_size () + (size_ % unit_size () ? 1 : 0)); _no_delete = true; } const void* array () const { return _array; } void clear (const bool reuse = true) { if (_no_delete) _array = 0, _tail = 0; if (_array) std::free (_array); if (_tail) std::free (_tail); if (_tail0) std::free (_tail0); if (_ninfo) std::free (_ninfo); if (_block) std::free (_block); _array = 0; _tail = 0; _tail0 = 0; _ninfo = 0; _block = 0; _bheadF = _bheadC = _bheadO = _capacity = _size = _quota = _quota0 = 0; if (reuse) _initialize (); _no_delete = false; } // return the first child for a tree rooted by a given node int begin (npos_t& from, size_t& len) { #ifndef USE_FAST_LOAD if (! _ninfo) _restore_ninfo (); #endif int base = from >> 32 ? - static_cast (from >> 32) : _array[from].base; if (base >= 0) { // on trie uchar c = _ninfo[from].child; if (! from && ! (c = _ninfo[base ^ c].sibling)) // bug fix return CEDAR_NO_PATH; // no entry for (; c && base >= 0; ++len) { from = static_cast (base) ^ c; base = _array[from].base; c = _ninfo[from].child; } if (base >= 0) return _array[base ^ c].base; } const size_t len_ = std::strlen (&_tail[-base]); from &= TAIL_OFFSET_MASK; from |= static_cast (static_cast (-base) + len_) << 32; len += len_; return *reinterpret_cast (&_tail[-base] + len_ + 1); } // return the next child if any int next (npos_t& from, size_t& len, const npos_t root = 0) { uchar c = 0; if (const int offset = static_cast (from >> 32)) { // on tail if (root >> 32) return CEDAR_NO_PATH; from &= TAIL_OFFSET_MASK; len -= static_cast (offset - (-_array[from].base)); } else c = _ninfo[_array[from].base ^ 0].sibling; for (; ! c && from != root; --len) { c = _ninfo[from].sibling; from = static_cast (_array[from].check); } if (! c) return CEDAR_NO_PATH; return begin (from = static_cast (_array[from].base) ^ c, ++len); } npos_t tracking_node[NUM_TRACKING_NODES + 1]; private: // currently disabled; implement these if you need da (const da&); da& operator= (const da&); node* _array; union { char* _tail; int* _length; }; union { int* _tail0; int* _length0; }; ninfo* _ninfo; block* _block; int _bheadF; // first block of Full; 0 int _bheadC; // first block of Closed; 0 if no Closed int _bheadO; // first block of Open; 0 if no Open int _capacity; int _size; int _quota; int _quota0; int _no_delete; short _reject[257]; // static void _err (const char* fn, const int ln, const char* msg) { std::fprintf (stderr, "cedar: %s [%d]: %s", fn, ln, msg); std::exit (1); } template static void _realloc_array (T*& p, const int size_n, const int size_p = 0) { void* tmp = std::realloc (p, sizeof (T) * static_cast (size_n)); if (! tmp) std::free (p), _err (__FILE__, __LINE__, "memory reallocation failed\n"); p = static_cast (tmp); static const T T0 = T (); for (T* q (p + size_p), * const r (p + size_n); q != r; ++q) *q = T0; } void _initialize () { // initilize the first special block _realloc_array (_array, 256, 256); _realloc_array (_tail, sizeof (int)); _realloc_array (_tail0, 1); _realloc_array (_ninfo, 256); _realloc_array (_block, 1); _array[0] = node (0, -1); for (int i = 1; i < 256; ++i) _array[i] = node (i == 1 ? -255 : - (i - 1), i == 255 ? -1 : - (i + 1)); _capacity = _size = 256; _block[0].ehead = 1; // bug fix for erase _quota = *_length = static_cast (sizeof (int)); _quota0 = 1; for (size_t i = 0 ; i <= NUM_TRACKING_NODES; ++i) tracking_node[i] = 0; for (short i = 0; i <= 256; ++i) _reject[i] = i + 1; } // follow/create edge template int _follow (npos_t& from, const uchar& label, T& cf) { int to = 0; const int base = _array[from].base; if (base < 0 || _array[to = base ^ label].check < 0) { to = _pop_enode (base, label, static_cast (from)); _push_sibling (from, to ^ label, label, base >= 0); } else if (_array[to].check != static_cast (from)) to = _resolve (from, base, label, cf); return to; } // find key from double array int _find (const char* key, npos_t& from, size_t& pos, const size_t len) const { npos_t offset = from >> 32; if (! offset) { // node on trie for (const uchar* const key_ = reinterpret_cast (key); _array[from].base >= 0; ) { if (pos == len) { const node& n = _array[_array[from].base ^ 0]; if (n.check != static_cast (from)) return CEDAR_NO_VALUE; return n.base; } size_t to = static_cast (_array[from].base); to ^= key_[pos]; if (_array[to].check != static_cast (from)) return CEDAR_NO_PATH; ++pos; from = to; } offset = static_cast (-_array[from].base); } // switch to _tail to match suffix const size_t pos_orig = pos; // start position in reading _tail const char* const tail = &_tail[offset] - pos; if (pos < len) { do if (key[pos] != tail[pos]) break; while (++pos < len); if (const npos_t moved = pos - pos_orig) { from &= TAIL_OFFSET_MASK; from |= (offset + moved) << 32; } if (pos < len) return CEDAR_NO_PATH; // input > tail, input != tail } if (tail[pos]) return CEDAR_NO_VALUE; // input < tail return *reinterpret_cast (&tail[len + 1]); } #ifndef USE_FAST_LOAD void _restore_ninfo () { _realloc_array (_ninfo, _size); for (int to = 0; to < _size; ++to) { const int from = _array[to].check; if (from < 0) continue; // skip empty node const int base = _array[from].base; if (const uchar label = static_cast (base ^ to)) // skip leaf _push_sibling (static_cast (from), base, label, ! from || _ninfo[from].child || _array[base ^ 0].check == from); } } void _restore_block () { _realloc_array (_block, _size >> 8); _bheadF = _bheadC = _bheadO = 0; for (int bi (0), e (0); e < _size; ++bi) { // register blocks to full block& b = _block[bi]; b.num = 0; for (; e < (bi << 8) + 256; ++e) if (_array[e].check < 0 && ++b.num == 1) b.ehead = e; int& head_out = b.num == 1 ? _bheadC : (b.num == 0 ? _bheadF : _bheadO); _push_block (bi, head_out, ! head_out && b.num); } } #endif void _set_result (result_type* x, value_type r, size_t = 0, npos_t = 0) const { *x = r; } void _set_result (result_pair_type* x, value_type r, size_t l, npos_t = 0) const { x->value = r; x->length = l; } void _set_result (result_triple_type* x, value_type r, size_t l, npos_t from) const { x->value = r; x->length = l; x->id = from; } void _pop_block (const int bi, int& head_in, const bool last) { if (last) { // last one poped; Closed or Open head_in = 0; } else { const block& b = _block[bi]; _block[b.prev].next = b.next; _block[b.next].prev = b.prev; if (bi == head_in) head_in = b.next; } } void _push_block (const int bi, int& head_out, const bool empty) { block& b = _block[bi]; if (empty) { // the destination is empty head_out = b.prev = b.next = bi; } else { // use most recently pushed int& tail_out = _block[head_out].prev; b.prev = tail_out; b.next = head_out; head_out = tail_out = _block[tail_out].next = bi; } } int _add_block () { if (_size == _capacity) { // allocate memory if needed #ifdef USE_EXACT_FIT _capacity += _size >= MAX_ALLOC_SIZE ? MAX_ALLOC_SIZE : _size; #else _capacity += _capacity; #endif _realloc_array (_array, _capacity, _capacity); _realloc_array (_ninfo, _capacity, _size); _realloc_array (_block, _capacity >> 8, _size >> 8); } _block[_size >> 8].ehead = _size; _array[_size] = node (- (_size + 255), - (_size + 1)); for (int i = _size + 1; i < _size + 255; ++i) _array[i] = node (-(i - 1), -(i + 1)); _array[_size + 255] = node (- (_size + 254), -_size); _push_block (_size >> 8, _bheadO, ! _bheadO); // append to block Open _size += 256; return (_size >> 8) - 1; } // transfer block from one start w/ head_in to one start w/ head_out void _transfer_block (const int bi, int& head_in, int& head_out) { _pop_block (bi, head_in, bi == _block[bi].next); _push_block (bi, head_out, ! head_out && _block[bi].num); } // pop empty node from block; never transfer the special block (bi = 0) int _pop_enode (const int base, const uchar label, const int from) { const int e = base < 0 ? _find_place () : base ^ label; const int bi = e >> 8; node& n = _array[e]; block& b = _block[bi]; if (--b.num == 0) { if (bi) _transfer_block (bi, _bheadC, _bheadF); // Closed to Full } else { // release empty node from empty ring _array[-n.base].check = n.check; _array[-n.check].base = n.base; if (e == b.ehead) b.ehead = -n.check; // set ehead if (bi && b.num == 1 && b.trial != MAX_TRIAL) // Open to Closed _transfer_block (bi, _bheadO, _bheadC); } // initialize the released node if (label) n.base = -1; else n.value = value_type (0); n.check = from; if (base < 0) _array[from].base = e ^ label; return e; } // push empty node into empty ring void _push_enode (const int e) { const int bi = e >> 8; block& b = _block[bi]; if (++b.num == 1) { // Full to Closed b.ehead = e; _array[e] = node (-e, -e); if (bi) _transfer_block (bi, _bheadF, _bheadC); // Full to Closed } else { const int prev = b.ehead; const int next = -_array[prev].check; _array[e] = node (-prev, -next); _array[prev].check = _array[next].base = -e; if (b.num == 2 || b.trial == MAX_TRIAL) { // Closed to Open if (bi) _transfer_block (bi, _bheadC, _bheadO); } b.trial = 0; } if (b.reject < _reject[b.num]) b.reject = _reject[b.num]; _ninfo[e] = ninfo (); // reset ninfo; no child, no sibling } // push label to from's child void _push_sibling (const npos_t from, const int base, const uchar label, const bool flag = true) { uchar* c = &_ninfo[from].child; if (flag && (ORDERED ? label > *c : ! *c)) do c = &_ninfo[base ^ *c].sibling; while (ORDERED && *c && *c < label); _ninfo[base ^ label].sibling = *c, *c = label; } // pop label from from's child void _pop_sibling (const npos_t from, const int base, const uchar label) { uchar* c = &_ninfo[from].child; while (*c != label) c = &_ninfo[base ^ *c].sibling; *c = _ninfo[base ^ label].sibling; } // check whether to replace branching w/ the newly added node bool _consult (const int base_n, const int base_p, uchar c_n, uchar c_p) const { do if (! (c_p = _ninfo[base_p ^ c_p].sibling)) return false; while ((c_n = _ninfo[base_n ^ c_n].sibling)); return true; } // enumerate (equal to or more than one) child nodes uchar* _set_child (uchar* p, const int base, uchar c, const int label = -1) { --p; if (! c) { *++p = c; c = _ninfo[base ^ c].sibling; } // 0: terminal if (ORDERED) while (c && c < label) { *++p = c; c = _ninfo[base ^ c].sibling; } if (label != -1) *++p = static_cast (label); while (c) { *++p = c; c = _ninfo[base ^ c].sibling; } return p; } // explore new block to settle down int _find_place () { if (_bheadC) return _block[_bheadC].ehead; if (_bheadO) return _block[_bheadO].ehead; return _add_block () << 8; } int _find_place (const uchar* const first, const uchar* const last) { if (int bi = _bheadO) { const int bz = _block[_bheadO].prev; const short nc = static_cast (last - first + 1); while (1) { // set candidate block block& b = _block[bi]; if (b.num >= nc && nc < b.reject) // explore configuration for (int e = b.ehead;;) { const int base = e ^ *first; for (const uchar* p = first; _array[base ^ *++p].check < 0; ) if (p == last) return b.ehead = e; // no conflict if ((e = -_array[e].check) == b.ehead) break; } b.reject = nc; if (b.reject < _reject[b.num]) _reject[b.num] = b.reject; const int bi_ = b.next; if (++b.trial == MAX_TRIAL) _transfer_block (bi, _bheadO, _bheadC); if (bi == bz) break; bi = bi_; } } return _add_block () << 8; } // resolve conflict on base_n ^ label_n = base_p ^ label_p template int _resolve (npos_t& from_n, const int base_n, const uchar label_n, T& cf) { // examine siblings of conflicted nodes const int to_pn = base_n ^ label_n; const int from_p = _array[to_pn].check; const int base_p = _array[from_p].base; const bool flag // whether to replace siblings of newly added = _consult (base_n, base_p, _ninfo[from_n].child, _ninfo[from_p].child); uchar child[256]; uchar* const first = &child[0]; uchar* const last = flag ? _set_child (first, base_n, _ninfo[from_n].child, label_n) : _set_child (first, base_p, _ninfo[from_p].child); const int base = (first == last ? _find_place () : _find_place (first, last)) ^ *first; // replace & modify empty list const int from = flag ? static_cast (from_n) : from_p; const int base_ = flag ? base_n : base_p; if (flag && *first == label_n) _ninfo[from].child = label_n; // new child _array[from].base = base; // new base for (const uchar* p = first; p <= last; ++p) { // to_ => to const int to = _pop_enode (base, *p, from); const int to_ = base_ ^ *p; _ninfo[to].sibling = (p == last ? 0 : *(p + 1)); if (flag && to_ == to_pn) continue; // skip newcomer (no child) cf (to_, to); node& n = _array[to]; node& n_ = _array[to_]; if ((n.base = n_.base) > 0 && *p) { // copy base; bug fix uchar c = _ninfo[to].child = _ninfo[to_].child; do _array[n.base ^ c].check = to; // adjust grand son's check while ((c = _ninfo[n.base ^ c].sibling)); } if (! flag && to_ == static_cast (from_n)) // parent node moved from_n = static_cast (to); // bug fix if (! flag && to_ == to_pn) { // the address is immediately used _push_sibling (from_n, to_pn ^ label_n, label_n); _ninfo[to_].child = 0; // remember to reset child if (label_n) n_.base = -1; else n_.value = value_type (0); n_.check = static_cast (from_n); } else _push_enode (to_); if (NUM_TRACKING_NODES) // keep the traversed node updated for (size_t j = 0; tracking_node[j] != 0; ++j) { if (static_cast (tracking_node[j] & TAIL_OFFSET_MASK) == to_) { tracking_node[j] &= NODE_INDEX_MASK; tracking_node[j] |= static_cast (to); } } } return flag ? base ^ label_n : to_pn; } // test the validity of double array for debug void _test (const npos_t from = 0) const { const int base = _array[from].base; if (base < 0) { // validate tail offset assert (*_length >= static_cast (-base + 1 + sizeof (value_type))); return; } uchar c = _ninfo[from].child; do { if (from) assert (_array[base ^ c].check == static_cast (from)); if (c) _test (static_cast (base ^ c)); } while ((c = _ninfo[base ^ c].sibling)); } }; } #endif